In [5]:
%%time
import numpy as np
import pandas as pd
import time
import sklearn
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

# Import various classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis


# Load dataset
data = load_iris()
X, y = data.data, data.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) #random_state=42

# Define classifiers to test
classifiers = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Support Vector Machine": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(),
    "Gradient Boosting": GradientBoostingClassifier(),
    "AdaBoost": AdaBoostClassifier(),
#   "Neural Network (MLP)": MLPClassifier(),
    "Linear Discriminant Analysis": LinearDiscriminantAnalysis(),
    "Quadratic Discriminant Analysis": QuadraticDiscriminantAnalysis()
}



# Evaluate classifiers using cross-validation
best_model = None
best_score = 0
results = {}

for name, clf in classifiers.items():
    pipeline = make_pipeline(StandardScaler(), clf)
    start_time = time.time()
    scores = cross_val_score(pipeline, X_train, y_train, cv=10, scoring="accuracy")
    end_time = time.time()
    mean_score = np.mean(scores)
    elapsed_time = end_time - start_time
    results[name] = {"Accuracy": mean_score, "Time (seconds)": elapsed_time}

    if mean_score > best_score:
        best_score = mean_score
        best_model = pipeline

# Fit the best model and evaluate on test data
best_model.fit(X_train, y_train)
test_accuracy = accuracy_score(y_test, best_model.predict(X_test))

# Display results
results_df = pd.DataFrame.from_dict(results, orient="index")
results_df = results_df.sort_values(by="Accuracy", ascending=False)

print("Model Performance:")
print(results_df)
print(f"\nBest Model: {best_model.named_steps}\nTest Accuracy: {test_accuracy:.4f}")

Model Performance:
                                 Accuracy  Time (seconds)
Gradient Boosting                1.000000        0.578098
Linear Discriminant Analysis     1.000000        0.010002
Random Forest                    0.990000        0.388557
Logistic Regression              0.990000        0.031420
Support Vector Machine           0.990000        0.009514
K-Nearest Neighbors              0.990000        0.014000
AdaBoost                         0.990000        0.230031
Decision Tree                    0.990000        0.009001
Naive Bayes                      0.980909        0.007999
Quadratic Discriminant Analysis  0.980000        0.007999

Best Model: {'standardscaler': StandardScaler(), 'gradientboostingclassifier': GradientBoostingClassifier()}
Test Accuracy: 0.9111
CPU times: total: 1.33 s
Wall time: 1.35 s


In [None]:
#! pip install scikit-learn