<a href="https://colab.research.google.com/github/nazimulrahmann/machine_learning/blob/main/classification_algorithms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

**Dummy Dataset**

In [None]:
from sklearn.model_selection import train_test_split

# Set random seed for reproducibility
np.random.seed(42)

# Parameters
n_samples = 1000  # Total number of samples
n_features = 5    # Number of features
n_classes = 3     # Number of classes

# Generate random features (X)
# Using normal distribution with different means for each class
X = np.zeros((n_samples, n_features))
y = np.zeros(n_samples, dtype=int)

# Create class clusters with different characteristics
for class_idx in range(n_classes):
    # Select indices for this class
    start_idx = class_idx * (n_samples // n_classes)
    end_idx = (class_idx + 1) * (n_samples // n_classes)
    if class_idx == n_classes - 1:  # Handle last class in case n_samples isn't divisible
        end_idx = n_samples

    # Generate features with class-specific mean and variance
    X[start_idx:end_idx] = np.random.normal(
        loc=class_idx * 2,  # Mean increases with class index
        scale=1.0 + class_idx * 0.5,  # Variance increases with class index
        size=(end_idx - start_idx, n_features)
    )

    # Assign class labels
    y[start_idx:end_idx] = class_idx

# Add some noise to make it more realistic
X += np.random.normal(scale=0.5, size=X.shape)

# Split into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y  # Preserve class distribution in split
)

**Evaluation**

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

def evaluate_model(model, X_test, y_test):
    """
    Evaluate a model and print metrics
    Returns a dictionary of metrics
    """
    # Make predictions
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None

    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    cm = confusion_matrix(y_test, y_pred)

    # Print results
    print(f"Accuracy: {accuracy:.4f}")
    print("\nClassification Report:")
    print(pd.DataFrame(report).transpose())
    print("\nConfusion Matrix:")
    print(cm)

    return {
        'model': str(model),
        'accuracy': accuracy,
        'classification_report': report,
        'confusion_matrix': cm
    }

**Logistic Regression | Classification**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV

# Model setup
log_reg = LogisticRegression(random_state=42, max_iter=1000)

# Parameter grid
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'penalty': ['l1', 'l2', 'elasticnet', None],
    'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
    'class_weight': [None, 'balanced']
}

# Grid search
print("Training Logistic Regression...")
log_reg_grid = GridSearchCV(log_reg, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
log_reg_grid.fit(X_train, y_train)

# Best model
best_log_reg = log_reg_grid.best_estimator_
print(f"Best parameters: {log_reg_grid.best_params_}")

# Evaluation
print("\nEvaluating Logistic Regression:")
log_reg_metrics = evaluate_model(best_log_reg, X_test, y_test)

**Ridge Classifier**

In [None]:
from sklearn.linear_model import RidgeClassifier

ridge = RidgeClassifier(random_state=42)
param_grid = {'alpha': [0.1, 0.5, 1.0, 2.0, 5.0]}

print("Training Ridge Classifier...")
ridge_grid = GridSearchCV(ridge, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
ridge_grid.fit(X_train, y_train)

best_ridge = ridge_grid.best_estimator_
print(f"Best parameters: {ridge_grid.best_params_}")

print("\nEvaluating Ridge Classifier:")
ridge_metrics = evaluate_model(best_ridge, X_test, y_test)

**SGD Classifier**

In [None]:
from sklearn.linear_model import SGDClassifier

sgd = SGDClassifier(random_state=42)
param_grid = {
    'loss': ['hinge', 'log_loss', 'modified_huber', 'squared_hinge', 'perceptron'],
    'penalty': ['l1', 'l2', 'elasticnet'],
    'alpha': [0.0001, 0.001, 0.01, 0.1],
    'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'],
    'class_weight': [None, 'balanced']
}

print("Training SGD Classifier...")
sgd_grid = GridSearchCV(sgd, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
sgd_grid.fit(X_train, y_train)

best_sgd = sgd_grid.best_estimator_
print(f"Best parameters: {sgd_grid.best_params_}")

print("\nEvaluating SGD Classifier:")
sgd_metrics = evaluate_model(best_sgd, X_test, y_test)

**Decision Tree Classifier**

In [None]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(random_state=42)
param_grid = {
    'criterion': ['gini', 'entropy', 'log_loss'],
    'max_depth': [None, 5, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2', None],
    'class_weight': [None, 'balanced']
}

print("Training Decision Tree...")
dt_grid = GridSearchCV(dt, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
dt_grid.fit(X_train, y_train)

best_dt = dt_grid.best_estimator_
print(f"Best parameters: {dt_grid.best_params_}")

print("\nEvaluating Decision Tree:")
dt_metrics = evaluate_model(best_dt, X_test, y_test)

**Random Forest Classifier**

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2', None],
    'bootstrap': [True, False],
    'class_weight': [None, 'balanced', 'balanced_subsample']
}

print("Training Random Forest...")
rf_grid = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
rf_grid.fit(X_train, y_train)

best_rf = rf_grid.best_estimator_
print(f"Best parameters: {rf_grid.best_params_}")

print("\nEvaluating Random Forest:")
rf_metrics = evaluate_model(best_rf, X_test, y_test)

**Extra Tree Classifier**

In [None]:
from sklearn.ensemble import ExtraTreesClassifier

et = ExtraTreesClassifier(random_state=42)
param_grid = {
    'n_estimators': [100, 200],
    'criterion': ['gini', 'entropy'],
    'max_features': ['sqrt', 'log2', None],
    'class_weight': [None, 'balanced', 'balanced_subsample']
}

print("Training Extra Trees...")
et_grid = GridSearchCV(et, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
et_grid.fit(X_train, y_train)

best_et = et_grid.best_estimator_
print(f"Best parameters: {et_grid.best_params_}")

print("\nEvaluating Extra Trees:")
et_metrics = evaluate_model(best_et, X_test, y_test)

**Gradient Boosting classifier**

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier(random_state=42)
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'subsample': [0.8, 1.0],
    'min_samples_split': [2, 5]
}

print("Training Gradient Boosting...")
gb_grid = GridSearchCV(gb, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
gb_grid.fit(X_train, y_train)

best_gb = gb_grid.best_estimator_
print(f"Best parameters: {gb_grid.best_params_}")

print("\nEvaluating Gradient Boosting:")
gb_metrics = evaluate_model(best_gb, X_test, y_test)

**XGBoost Classifier**

In [None]:
from xgboost import XGBClassifier

xgb = XGBClassifier(random_state=42, eval_metric='logloss', use_label_encoder=False)
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 6, 9],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'gamma': [0, 0.1, 0.2],
    'reg_alpha': [0, 0.1, 1],
    'reg_lambda': [0, 0.1, 1],
    'scale_pos_weight': [1, 2, 5]
}

print("Training XGBoost...")
xgb_grid = GridSearchCV(xgb, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
xgb_grid.fit(X_train, y_train)

best_xgb = xgb_grid.best_estimator_
print(f"Best parameters: {xgb_grid.best_params_}")

print("\nEvaluating XGBoost:")
xgb_metrics = evaluate_model(best_xgb, X_test, y_test)

**LGBM Classifier**

In [None]:
from lightgbm import LGBMClassifier

lgbm = LGBMClassifier(random_state=42)
param_grid = {
    'n_estimators': [100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'num_leaves': [31, 63, 127],
    'max_depth': [-1, 10, 20],
    'min_child_samples': [20, 50],
    'reg_alpha': [0, 0.1, 1],
    'reg_lambda': [0, 0.1, 1],
    'class_weight': [None, 'balanced']
}

print("Training LightGBM...")
lgbm_grid = GridSearchCV(lgbm, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
lgbm_grid.fit(X_train, y_train)

best_lgbm = lgbm_grid.best_estimator_
print(f"Best parameters: {lgbm_grid.best_params_}")

print("\nEvaluating LightGBM:")
lgbm_metrics = evaluate_model(best_lgbm, X_test, y_test)

**Cat Boost Classifier**

In [None]:
!pip install catboost

In [None]:
from catboost import CatBoostClassifier

cb = CatBoostClassifier(random_state=42, verbose=0)
param_grid = {
    'iterations': [100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'depth': [4, 6, 8],
    'l2_leaf_reg': [1, 3, 5],
    'auto_class_weights': [None, 'Balanced']
}

print("Training CatBoost...")
cb_grid = GridSearchCV(cb, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
cb_grid.fit(X_train, y_train)

best_cb = cb_grid.best_estimator_
print(f"Best parameters: {cb_grid.best_params_}")

print("\nEvaluating CatBoost:")
cb_metrics = evaluate_model(best_cb, X_test, y_test)

**Adaboost Classifier**

In [None]:
from sklearn.ensemble import AdaBoostClassifier

ada = AdaBoostClassifier(random_state=42)
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0],
    'algorithm': ['SAMME', 'SAMME.R']
}

print("Training AdaBoost...")
ada_grid = GridSearchCV(ada, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
ada_grid.fit(X_train, y_train)

best_ada = ada_grid.best_estimator_
print(f"Best parameters: {ada_grid.best_params_}")

print("\nEvaluating AdaBoost:")
ada_metrics = evaluate_model(best_ada, X_test, y_test)

**Hist Gradient Boosting Classifier**

In [None]:
from sklearn.ensemble import HistGradientBoostingClassifier

hgb = HistGradientBoostingClassifier(random_state=42)
param_grid = {
    'learning_rate': [0.01, 0.1, 0.2],
    'max_iter': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_leaf': [20, 50, 100],
    'class_weight': [None, 'balanced']
}

print("Training Hist Gradient Boosting...")
hgb_grid = GridSearchCV(hgb, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
hgb_grid.fit(X_train, y_train)

best_hgb = hgb_grid.best_estimator_
print(f"Best parameters: {hgb_grid.best_params_}")

print("\nEvaluating Hist Gradient Boosting:")
hgb_metrics = evaluate_model(best_hgb, X_test, y_test)

**Suport Vector Classifier**

In [None]:
from sklearn.svm import SVC

svc = SVC(random_state=42, probability=True)
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto', 0.1, 1],
    'degree': [2, 3, 4],
    'class_weight': [None, 'balanced']
}

print("Training SVM...")
svc_grid = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
svc_grid.fit(X_train, y_train)

best_svc = svc_grid.best_estimator_
print(f"Best parameters: {svc_grid.best_params_}")

print("\nEvaluating SVM:")
svc_metrics = evaluate_model(best_svc, X_test, y_test)

**Linear Support Vector Classifier**

In [None]:
from sklearn.svm import LinearSVC

linear_svc = LinearSVC(random_state=42)
param_grid = {
    'C': [0.1, 1, 10],
    'penalty': ['l1', 'l2'],
    'loss': ['hinge', 'squared_hinge'],
    'dual': [True, False],
    'class_weight': [None, 'balanced']
}

print("Training Linear SVM...")
linear_svc_grid = GridSearchCV(linear_svc, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
linear_svc_grid.fit(X_train, y_train)

best_linear_svc = linear_svc_grid.best_estimator_
print(f"Best parameters: {linear_svc_grid.best_params_}")

print("\nEvaluating Linear SVM:")
linear_svc_metrics = evaluate_model(best_linear_svc, X_test, y_test)

**Nu Support Vector Classifier**

In [None]:
from sklearn.svm import NuSVC

nu_svc = NuSVC(random_state=42, probability=True)
param_grid = {
    'nu': [0.1, 0.5, 0.8],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto', 0.1, 1],
    'class_weight': [None, 'balanced']
}

print("Training NuSVC...")
nu_svc_grid = GridSearchCV(nu_svc, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
nu_svc_grid.fit(X_train, y_train)

best_nu_svc = nu_svc_grid.best_estimator_
print(f"Best parameters: {nu_svc_grid.best_params_}")

print("\nEvaluating NuSVC:")
nu_svc_metrics = evaluate_model(best_nu_svc, X_test, y_test)

**Gaussian Naive Bayes Classifier**

In [None]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
param_grid = {'var_smoothing': [1e-9, 1e-8, 1e-7, 1e-6]}

print("Training Gaussian Naive Bayes...")
gnb_grid = GridSearchCV(gnb, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
gnb_grid.fit(X_train, y_train)

best_gnb = gnb_grid.best_estimator_
print(f"Best parameters: {gnb_grid.best_params_}")

print("\nEvaluating Gaussian Naive Bayes:")
gnb_metrics = evaluate_model(best_gnb, X_test, y_test)

**Bernoulli Naive Bayes**

In [None]:
from sklearn.naive_bayes import BernoulliNB

bnb = BernoulliNB()
param_grid = {
    'alpha': [0.1, 0.5, 1.0, 2.0],
    'binarize': [None, 0.0, 0.5]
}

print("Training Bernoulli Naive Bayes...")
bnb_grid = GridSearchCV(bnb, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
bnb_grid.fit(X_train, y_train)

best_bnb = bnb_grid.best_estimator_
print(f"Best parameters: {bnb_grid.best_params_}")

print("\nEvaluating Bernoulli Naive Bayes:")
bnb_metrics = evaluate_model(best_bnb, X_test, y_test)

**Multinomial Naive Bayes**

In [None]:
from sklearn.naive_bayes import MultinomialNB

mnb = MultinomialNB()
param_grid = {
    'alpha': [0.1, 0.5, 1.0, 2.0],
    'fit_prior': [True, False]
}

print("Training Multinomial Naive Bayes...")
mnb_grid = GridSearchCV(mnb, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
mnb_grid.fit(X_train, y_train)

best_mnb = mnb_grid.best_estimator_
print(f"Best parameters: {mnb_grid.best_params_}")

print("\nEvaluating Multinomial Naive Bayes:")
mnb_metrics = evaluate_model(best_mnb, X_test, y_test)

**Complement Naive Bayes Classification**

In [None]:
from sklearn.naive_bayes import ComplementNB

cnb = ComplementNB()
param_grid = {
    'alpha': [0.1, 0.5, 1.0, 2.0],
    'fit_prior': [True, False],
    'norm': [True, False]
}

print("Training Complement Naive Bayes...")
cnb_grid = GridSearchCV(cnb, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
cnb_grid.fit(X_train, y_train)

best_cnb = cnb_grid.best_estimator_
print(f"Best parameters: {cnb_grid.best_params_}")

print("\nEvaluating Complement Naive Bayes:")
cnb_metrics = evaluate_model(best_cnb, X_test, y_test)

**Linear Discriminant Analysis**

In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis()
param_grid = {
    'solver': ['svd', 'lsqr', 'eigen'],
    'shrinkage': [None, 'auto', 0.1, 0.5, 0.9]
}

print("Training LDA...")
lda_grid = GridSearchCV(lda, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
lda_grid.fit(X_train, y_train)

best_lda = lda_grid.best_estimator_
print(f"Best parameters: {lda_grid.best_params_}")

print("\nEvaluating LDA:")
lda_metrics = evaluate_model(best_lda, X_test, y_test)

**Quadratic Discriminant Analysis**

In [None]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

qda = QuadraticDiscriminantAnalysis()
param_grid = {'reg_param': [0.0, 0.1, 0.5, 1.0]}

print("Training QDA...")
qda_grid = GridSearchCV(qda, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
qda_grid.fit(X_train, y_train)

best_qda = qda_grid.best_estimator_
print(f"Best parameters: {qda_grid.best_params_}")

print("\nEvaluating QDA:")
qda_metrics = evaluate_model(best_qda, X_test, y_test)

**KNN Classifier**

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'p': [1, 2]
}

print("Training KNN...")
knn_grid = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
knn_grid.fit(X_train, y_train)

best_knn = knn_grid.best_estimator_
print(f"Best parameters: {knn_grid.best_params_}")

print("\nEvaluating KNN:")
knn_metrics = evaluate_model(best_knn, X_test, y_test)

**Radius Neighbors Classifier**

In [None]:
from sklearn.neighbors import RadiusNeighborsClassifier

rnc = RadiusNeighborsClassifier()
param_grid = {
    'radius': [1.0, 2.0, 5.0],
    'weights': ['uniform', 'distance'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

print("Training Radius Neighbors...")
rnc_grid = GridSearchCV(rnc, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
rnc_grid.fit(X_train, y_train)

best_rnc = rnc_grid.best_estimator_
print(f"Best parameters: {rnc_grid.best_params_}")

print("\nEvaluating Radius Neighbors:")
rnc_metrics = evaluate_model(best_rnc, X_test, y_test)

**Nearest Centroid**

In [None]:
from sklearn.neighbors import NearestCentroid

nc = NearestCentroid()
param_grid = {
    'metric': ['euclidean', 'manhattan', 'cosine'],
    'shrink_threshold': [None, 0.1, 0.5, 1.0]
}

print("Training Nearest Centroid...")
nc_grid = GridSearchCV(nc, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
nc_grid.fit(X_train, y_train)

best_nc = nc_grid.best_estimator_
print(f"Best parameters: {nc_grid.best_params_}")

print("\nEvaluating Nearest Centroid:")
nc_metrics = evaluate_model(best_nc, X_test, y_test)

**MLP Classifier**

In [None]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(random_state=42, early_stopping=True)
param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],
    'activation': ['relu', 'tanh', 'logistic'],
    'alpha': [0.0001, 0.001, 0.01],
    'learning_rate': ['constant', 'invscaling', 'adaptive'],
    'learning_rate_init': [0.001, 0.01]
}

print("Training MLP...")
mlp_grid = GridSearchCV(mlp, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
mlp_grid.fit(X_train, y_train)

best_mlp = mlp_grid.best_estimator_
print(f"Best parameters: {mlp_grid.best_params_}")

print("\nEvaluating MLP:")
mlp_metrics = evaluate_model(best_mlp, X_test, y_test)

**Gaussian Process Classifier**

In [None]:
from sklearn.gaussian_process import GaussianProcessClassifier

gpc = GaussianProcessClassifier(random_state=42)
param_grid = {
    'kernel': [None, 'RBF', 'DotProduct'],
    'max_iter_predict': [100, 200]
}

print("Training Gaussian Process...")
gpc_grid = GridSearchCV(gpc, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
gpc_grid.fit(X_train, y_train)

best_gpc = gpc_grid.best_estimator_
print(f"Best parameters: {gpc_grid.best_params_}")

print("\nEvaluating Gaussian Process:")
gpc_metrics = evaluate_model(best_gpc, X_test, y_test)

**Label Propagation**

In [None]:
from sklearn.semi_supervised import LabelPropagation

lp = LabelPropagation()
param_grid = {
    'kernel': ['rbf', 'knn'],
    'gamma': [0.1, 0.5, 1.0],
    'n_neighbors': [3, 5, 7]
}

print("Training Label Propagation...")
lp_grid = GridSearchCV(lp, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
lp_grid.fit(X_train, y_train)

best_lp = lp_grid.best_estimator_
print(f"Best parameters: {lp_grid.best_params_}")

print("\nEvaluating Label Propagation:")
lp_metrics = evaluate_model(best_lp, X_test, y_test)

**Label Spreading**

In [None]:
from sklearn.semi_supervised import LabelSpreading

ls = LabelSpreading()
param_grid = {
    'kernel': ['rbf', 'knn'],
    'alpha': [0.1, 0.5, 0.9],
    'n_neighbors': [3, 5, 7]
}

print("Training Label Spreading...")
ls_grid = GridSearchCV(ls, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
ls_grid.fit(X_train, y_train)

best_ls = ls_grid.best_estimator_
print(f"Best parameters: {ls_grid.best_params_}")

print("\nEvaluating Label Spreading:")
ls_metrics = evaluate_model(best_ls, X_test, y_test)

**Model Comparison**

In [None]:
# Collect all metrics
all_metrics = [
    log_reg_metrics, ridge_metrics, sgd_metrics,
    dt_metrics, rf_metrics, et_metrics,
    gb_metrics, xgb_metrics, lgbm_metrics,
    cb_metrics, ada_metrics, hgb_metrics,
    svc_metrics, linear_svc_metrics, nu_svc_metrics,
    gnb_metrics, bnb_metrics, mnb_metrics, cnb_metrics,
    lda_metrics, qda_metrics,
    knn_metrics, rnc_metrics, nc_metrics,
    mlp_metrics,
    gpc_metrics,
    lp_metrics, ls_metrics
]

# Create comparison DataFrame
results_df = pd.DataFrame([{
    'Model': m['model'],
    'Accuracy': m['accuracy'],
    'Precision': m['classification_report']['weighted avg']['precision'],
    'Recall': m['classification_report']['weighted avg']['recall'],
    'F1-Score': m['classification_report']['weighted avg']['f1-score']
} for m in all_metrics])

# Sort by accuracy
results_df = results_df.sort_values('Accuracy', ascending=False).reset_index(drop=True)

print("\nModel Comparison by Accuracy:")
print(results_df)