In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import (
    RandomForestClassifier,
    BaggingClassifier,
    AdaBoostClassifier,
    GradientBoostingClassifier,
)
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# Load the dataset (replace with your real-time dataset)
data = load_iris()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Define a dictionary of classifiers
classifiers = {
    "DecisionTree": DecisionTreeClassifier(),
    "RandomForest": RandomForestClassifier(),
    "Bagging": BaggingClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "GradientBoosting": GradientBoostingClassifier(),
    "XGBoost": XGBClassifier(),
}

# Define hyperparameters for tuning for each classifier
param_grid = {
    "DecisionTree": {"max_depth": [3, 5, 7]},
    "RandomForest": {"n_estimators": [50, 100, 200]},
    "Bagging": {"n_estimators": [50, 100, 200], "max_samples": [0.5, 0.7, 1.0]},
    "AdaBoost": {"n_estimators": [50, 100, 200], "learning_rate": [0.1, 0.5, 1.0]},
    "GradientBoosting": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.1, 0.2]},
    "XGBoost": {"n_estimators": [50, 100, 200], "learning_rate": [0.01, 0.1, 0.2]},
}


# Perform model diagnosis and tuning for each classifier
for classifier_name, classifier in classifiers.items():
    print(f"\n{classifier_name} Classifier:")
    
    # Hyperparameter tuning using GridSearchCV
    grid_search = GridSearchCV(classifier, param_grid[classifier_name], cv=5, scoring='accuracy', n_jobs=-1)
    grid_search.fit(X_train, y_train)
    
    # Get the best parameters from GridSearchCV
    best_params = grid_search.best_params_
    print("Best Parameters:", best_params)
    
    # Train the model with the best parameters on the entire training dataset
    best_classifier = classifier.set_params(**best_params)
    best_classifier.fit(X_train, y_train)
    
    # Predict on the test dataset
    y_pred = best_classifier.predict(X_test)
    
    # Evaluate model performance
    accuracy = accuracy_score(y_test, y_pred)
    print("Accuracy:", accuracy)



DecisionTree Classifier:
Best Parameters: {'max_depth': 3}
Accuracy: 0.9666666666666667

RandomForest Classifier:
Best Parameters: {'n_estimators': 50}
Accuracy: 1.0

Bagging Classifier:
Best Parameters: {'max_samples': 0.5, 'n_estimators': 200}
Accuracy: 1.0

AdaBoost Classifier:
Best Parameters: {'learning_rate': 1.0, 'n_estimators': 100}
Accuracy: 0.9

GradientBoosting Classifier:
Best Parameters: {'learning_rate': 0.01, 'n_estimators': 50}
Accuracy: 1.0

XGBoost Classifier:
Best Parameters: {'learning_rate': 0.01, 'n_estimators': 50}
Accuracy: 1.0
