In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import pandas as pd
import time

In [None]:
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target
y = y.astype(int)

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

results = []


In [None]:
# Function to evaluate a classifier
def evaluate_classifier(name, clf):
    print(f"### {name} ###")
    start_time = time.time()
    clf.fit(X_train, y_train)
    train_time = time.time() - start_time
    y_pred = clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {acc:.2f}")
    print(f"Training Time: {train_time:.4f} seconds")

    # Save results for comparison table
    if isinstance(clf, LogisticRegression):
        time_complexity = "O(n*d)"
        outlier_effect = "High"
        notes = "Linear decision boundary; struggles with non-linear separable data."
    elif isinstance(clf, SVC):
        time_complexity = "O(n^2*d)"
        outlier_effect = "Moderate"
        notes = "Performs well on non-linear separable data with kernel methods."
    elif isinstance(clf, DecisionTreeClassifier):
        time_complexity = "O(n*log(n))"
        outlier_effect = "High"
        notes = "Prone to overfitting; sensitive to outliers."
    elif isinstance(clf, RandomForestClassifier):
        time_complexity = "O(trees*n*log(n))"
        outlier_effect = "Moderate"
        notes = "Handles overfitting better; reduces variance."
    elif isinstance(clf, GradientBoostingClassifier):
        time_complexity = "O(trees*n*log(n))"
        outlier_effect = "Moderate"
        notes = "Focuses on reducing errors sequentially; better for small datasets."
    elif isinstance(clf, GaussianNB):
        time_complexity = "O(n*d)"
        outlier_effect = "High"
        notes = "Assumes features are independent; quick to train."

    results.append({
        "Classifier": name,
        "Accuracy": f"{acc:.2f}",
        "Training Time (s)": f"{train_time:.4f}",
        "Time Complexity": time_complexity,
        "Outlier Sensitivity": outlier_effect,
        "Observations": notes
    })

    print("\n")


In [None]:

evaluate_classifier("Logistic Regression (Linear)", LogisticRegression(max_iter=1000))


### Logistic Regression (Linear) ###
Accuracy: 0.92
Training Time: 58.7928 seconds




In [None]:

evaluate_classifier("SVM with RBF Kernel", SVC(kernel="rbf", probability=True))


### SVM with RBF Kernel ###
Accuracy: 0.96
Training Time: 2125.9112 seconds




In [None]:
evaluate_classifier("Decision Tree", DecisionTreeClassifier())

### Decision Tree ###
Accuracy: 0.87
Training Time: 21.7060 seconds




In [None]:
evaluate_classifier("Random Forest", RandomForestClassifier())

In [None]:
evaluate_classifier("Gradient Boosting", GradientBoostingClassifier())

### Gradient Boosting ###
Accuracy: 0.95
Training Time: 3528.2132 seconds




In [None]:
evaluate_classifier("Naive Bayes", GaussianNB())