<a href="https://colab.research.google.com/github/pranavsrinivas29/hyperparameter_opt/blob/main/Classification_SVC%2CXG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Classifiers - SVC, XG

Hyp-opt - Raandom,Grid Based, Bayesian,Gradient-Based(L-BFGS-B, CG, POWELL)

In [1]:
import numpy as np
import pandas as pd
import warnings  # Import the warnings module
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score
warnings.filterwarnings('ignore')

In [2]:


# Load the "digits" dataset
data = load_digits()
X = data.data
y = data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create a dictionary of classifiers
classifiers = {
    'SVM': SVC(),

    'XGBoost': XGBClassifier()
}

# Create a dictionary of pipelines
pipelines = {}

for name, classifier in classifiers.items():
    pipelines[name] = Pipeline([
        ('scaler', StandardScaler()),
        ('bagging', BaggingClassifier(base_estimator=classifier, random_state=42))
    ])

# Train and evaluate each pipeline
results = {}

for name, pipeline in pipelines.items():
    # Ignore warnings within this block
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        pipeline.fit(X_train, y_train)
        accuracy = pipeline.score(X_test, y_test)
    results[name] = accuracy
    print(f'{name}: Accuracy = {accuracy:.4f}')




SVM: Accuracy = 0.9796
XGBoost: Accuracy = 0.9630


Random Search

In [None]:
from sklearn.model_selection import RandomizedSearchCV


In [None]:
# Create a dictionary of classifiers with hyperparameter grids
classifiers = {
    'SVM': {
        'classifier': SVC(),
        'param_grid': {
            'C': [0.1, 1, 10],
            'kernel': ['linear', 'rbf','sigmoid'],
            #'gamma': ['scale', 'auto'] + list(np.logspace(-3, 3, 7))
            'gamma': (1e-6, 1e+1, 'log-uniform')
        }
    },

    'XGBoost': {
        'classifier': XGBClassifier(),
        'param_grid': {
            'learning_rate': [0.01, 0.1, 0.25],
            'n_estimators': [50, 100, 250],
            'max_depth': [3, 4, 5],
            'min_child_weight': [1, 2, 3],
            'subsample': [0.7, 0.8, 1.0],
            'colsample_bytree': [0.7, 0.8, 1.0]
        }
    }
}

In [None]:


# Create a dictionary of pipelines with RandomizedSearchCV
pipelines = {}

best_params = {}

for name, classifier_info in classifiers.items():
    classifier = classifier_info['classifier']
    param_grid = classifier_info['param_grid']
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('random_search', RandomizedSearchCV(classifier, param_distributions=param_grid, n_iter=20, cv=5, n_jobs=-1))
    ])
    pipeline.fit(X_train, y_train)
    best_accuracy = pipeline.named_steps['random_search'].best_score_
    best_params[name] = pipeline.named_steps['random_search'].best_params_
    print(f'{name}: Best Accuracy = {best_accuracy:.4f}')
    print(f'{name}: Best Hyperparameters = {best_params[name]}')

SVM: Best Accuracy = 0.9674
SVM: Best Hyperparameters = {'kernel': 'linear', 'gamma': 1e-06, 'C': 10}
XGBoost: Best Accuracy = 0.9594
XGBoost: Best Hyperparameters = {'subsample': 0.7, 'n_estimators': 250, 'min_child_weight': 1, 'max_depth': 5, 'learning_rate': 0.1, 'colsample_bytree': 0.7}


Grid-Based

In [None]:
from sklearn.model_selection import GridSearchCV


In [None]:
# Create a dictionary of pipelines with GridSearchCV
pipelines = {}
best_params = {}

for name, classifier_info in classifiers.items():
    classifier = classifier_info['classifier']
    param_grid = classifier_info['param_grid']
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('grid_search', GridSearchCV(classifier, param_grid=param_grid, cv=5, n_jobs=-1))
    ])
    pipeline.fit(X_train, y_train)
    best_accuracy = pipeline.named_steps['grid_search'].best_score_
    best_params[name] = pipeline.named_steps['grid_search'].best_params_
    print(f'{name}: Best Accuracy = {best_accuracy:.4f}')
    print(f'{name}: Best Hyperparameters = {best_params[name]}')

SVM: Best Accuracy = 0.9801
SVM: Best Hyperparameters = {'C': 10, 'gamma': 0.01, 'kernel': 'rbf'}
XGBoost: Best Accuracy = 0.9642
XGBoost: Best Hyperparameters = {'colsample_bytree': 0.7, 'learning_rate': 0.25, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 100, 'subsample': 0.8}


Bayesian

In [None]:
!pip install scikit-optimize


Collecting scikit-optimize
  Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/100.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m92.2/100.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.3/100.3 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-23.9.7-py3-none-any.whl (23 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-23.9.7 scikit-optimize-0.9.0


In [None]:
from skopt import BayesSearchCV

In [None]:
pipelines = {}
best_params = {}

for name, classifier_info in classifiers.items():
    classifier = classifier_info['classifier']
    param_grid = classifier_info['param_grid']

    # Use Bayesian optimization (BayesSearchCV)
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('bayes_search', BayesSearchCV(
            classifier, param_grid, n_iter=20, cv=5, n_jobs=-1, random_state=42)
        )
    ])

    pipeline.fit(X_train, y_train)
    best_accuracy = pipeline.named_steps['bayes_search'].best_score_
    best_params[name] = pipeline.named_steps['bayes_search'].best_params_
    print(f'{name}: Best Accuracy = {best_accuracy:.4f}')
    print(f'{name}: Best Hyperparameters = {best_params[name]}')

SVM: Best Accuracy = 0.9833
SVM: Best Hyperparameters = OrderedDict([('C', 10.0), ('gamma', 0.005919370539100854), ('kernel', 'rbf')])
XGBoost: Best Accuracy = 0.9602
XGBoost: Best Hyperparameters = OrderedDict([('colsample_bytree', 0.7), ('learning_rate', 0.1), ('max_depth', 5), ('min_child_weight', 1), ('n_estimators', 250), ('subsample', 0.8)])


Gradient based with L-BFGS-B

In [5]:
from scipy.optimize import minimize


In [6]:

# Define a function to optimize hyperparameters of the SVM classifier
def optimize_svm_hyperparameters(params):
    C = params[0]
    gamma = max(params[1], 0.0)

    # Create and train an SVM classifier with the specified hyperparameters
    svm_classifier = SVC(C=C, kernel='rbf', gamma=gamma)
    svm_classifier.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = svm_classifier.predict(X_test)

    # Calculate negative accuracy (minimize the negative accuracy)
    accuracy = -accuracy_score(y_test, y_pred)

    return accuracy

# Define a function to optimize hyperparameters of the XGBoost classifier
def optimize_xgboost_hyperparameters(params):
    learning_rate, n_estimators, max_depth, min_child_weight, subsample, colsample_bytree = params
    learning_rate = max(learning_rate, 0)
    min_child_weight = max(min_child_weight, 0)
    subsample = max(0, min(subsample, 1))
    colsample_bytree= max(0, min(colsample_bytree, 1))
    # Create and train an XGBoost classifier with the specified hyperparameters
    xgb_classifier = XGBClassifier(
        learning_rate=learning_rate,
        n_estimators=int(n_estimators),
        max_depth=int(max_depth),
        min_child_weight=int(min_child_weight),
        subsample=subsample,
        colsample_bytree=colsample_bytree
    )
    xgb_classifier.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = xgb_classifier.predict(X_test)

    # Calculate negative accuracy (minimize the negative accuracy)
    accuracy = -accuracy_score(y_test, y_pred)

    return accuracy


In [7]:
# Initial values for hyperparameters (C, gamma in SVM, and hyperparameters for XGBoost)
initial_svm_C = 1.0
initial_svm_gamma = 1e-3
initial_xgboost_params = [0.1, 100, 3, 1, 0.8, 0.8]  # Initial values for XGBoost hyperparameters

# Perform gradient-based optimization using minimize from scipy for both classifiers
initial_svm_params = [initial_svm_C, initial_svm_gamma]
result_svm = minimize(optimize_svm_hyperparameters, initial_svm_params, method='L-BFGS-B')
result_xgboost = minimize(optimize_xgboost_hyperparameters, initial_xgboost_params, method='L-BFGS-B')


In [8]:
best_svm_C, best_svm_gamma = result_svm.x
best_xgboost_params = result_xgboost.x
best_svm_accuracy = -result_svm.fun  # Convert back to accuracy
best_xgboost_accuracy = -result_xgboost.fun
print("L-BFGS_B")
print(f'Best SVM C after optimization: {best_svm_C:.4f}')
print(f'Best SVM Gamma after optimization: {best_svm_gamma:.4f}')
print(f'Best SVM Accuracy after optimization: {best_svm_accuracy:.4f}')
print()
print(f'Best XGBoost Learning Rate after optimization: {best_xgboost_params[0]:.4f}')
print(f'Best XGBoost N Estimators after optimization: {best_xgboost_params[1]:.4f}')
print(f'Best XGBoost Max Depth after optimization: {best_xgboost_params[2]:.4f}')
print(f'Best XGBoost Min Child Weight after optimization: {best_xgboost_params[3]:.4f}')
print(f'Best XGBoost Subsample after optimization: {best_xgboost_params[4]:.4f}')
print(f'Best XGBoost Col Sample By Tree after optimization: {best_xgboost_params[5]:.4f}')
print(f'Best XGBoost Accuracy after optimization: {best_xgboost_accuracy:.4f}')

L-BFGS_B
Best SVM C after optimization: 1.0000
Best SVM Gamma after optimization: 0.0010
Best SVM Accuracy after optimization: 0.9907

Best XGBoost Learning Rate after optimization: 0.1000
Best XGBoost N Estimators after optimization: 100.0000
Best XGBoost Max Depth after optimization: 3.0000
Best XGBoost Min Child Weight after optimization: 1.0000
Best XGBoost Subsample after optimization: 0.8000
Best XGBoost Col Sample By Tree after optimization: 0.8000
Best XGBoost Accuracy after optimization: 0.9630


Gradient Based with POWELL

In [None]:
# Initial values for hyperparameters (C, gamma in SVM, and hyperparameters for XGBoost)
initial_svm_C = 1.0
initial_svm_gamma = 1e-3
initial_xgboost_params = [0.1, 100, 3, 1, 0.8, 0.8]  # Initial values for XGBoost hyperparameters

# Perform gradient-based optimization using minimize from scipy for both classifiers
initial_svm_params = [initial_svm_C, initial_svm_gamma]
result_svm2 = minimize(optimize_svm_hyperparameters, initial_svm_params, method='POWELL')



In [None]:
best_svm_C, best_svm_gamma = result_svm2.x
best_svm_accuracy = -result_svm.fun  # Convert back to accuracy
print("Powell")
print(f'Best SVM C after optimization: {best_svm_C:.4f}')
print(f'Best SVM Gamma after optimization: {best_svm_gamma:.4f}')
print(f'Best SVM Accuracy after optimization: {best_svm_accuracy:.4f}')
print()

Powell
Best SVM C after optimization: 1.0000
Best SVM Gamma after optimization: 0.0010
Best SVM Accuracy after optimization: 0.9907



In [None]:
result_xgboost = minimize(optimize_xgboost_hyperparameters, initial_xgboost_params, method='POWELL')

In [None]:
print(f'Best XGBoost Learning Rate after optimization: {best_xgboost_params[0]:.4f}')
print(f'Best XGBoost N Estimators after optimization: {best_xgboost_params[1]:.4f}')
print(f'Best XGBoost Max Depth after optimization: {best_xgboost_params[2]:.4f}')
print(f'Best XGBoost Min Child Weight after optimization: {best_xgboost_params[3]:.4f}')
print(f'Best XGBoost Subsample after optimization: {best_xgboost_params[4]:.4f}')
print(f'Best XGBoost Col Sample By Tree after optimization: {best_xgboost_params[5]:.4f}')
print(f'Best XGBoost Accuracy after optimization: {best_xgboost_accuracy:.4f}')

Best XGBoost Learning Rate after optimization: 0.1000
Best XGBoost N Estimators after optimization: 100.0000
Best XGBoost Max Depth after optimization: 3.0000
Best XGBoost Min Child Weight after optimization: 1.0000
Best XGBoost Subsample after optimization: 0.8000
Best XGBoost Col Sample By Tree after optimization: 0.8000
Best XGBoost Accuracy after optimization: 0.9630


In [12]:
# Perform gradient-based optimization using minimize from scipy for both classifiers
initial_svm_params = [initial_svm_C, initial_svm_gamma]
result_svm3 = minimize(optimize_svm_hyperparameters, initial_svm_params, method='CG')

In [13]:
best_svm_C, best_svm_gamma = result_svm3.x
best_svm_accuracy = -result_svm.fun  # Convert back to accuracy
print("CG")
print(f'Best SVM C after optimization: {best_svm_C:.4f}')
print(f'Best SVM Gamma after optimization: {best_svm_gamma:.4f}')
print(f'Best SVM Accuracy after optimization: {best_svm_accuracy:.4f}')
print()

CG
Best SVM C after optimization: 1.0000
Best SVM Gamma after optimization: 0.0010
Best SVM Accuracy after optimization: 0.9907



In [9]:
result_xgboost = minimize(optimize_xgboost_hyperparameters, initial_xgboost_params, method='CG')

In [10]:
print(f'Best XGBoost Learning Rate after optimization: {best_xgboost_params[0]:.4f}')
print(f'Best XGBoost N Estimators after optimization: {best_xgboost_params[1]:.4f}')
print(f'Best XGBoost Max Depth after optimization: {best_xgboost_params[2]:.4f}')
print(f'Best XGBoost Min Child Weight after optimization: {best_xgboost_params[3]:.4f}')
print(f'Best XGBoost Subsample after optimization: {best_xgboost_params[4]:.4f}')
print(f'Best XGBoost Col Sample By Tree after optimization: {best_xgboost_params[5]:.4f}')
print(f'Best XGBoost Accuracy after optimization: {best_xgboost_accuracy:.4f}')

Best XGBoost Learning Rate after optimization: 0.1000
Best XGBoost N Estimators after optimization: 100.0000
Best XGBoost Max Depth after optimization: 3.0000
Best XGBoost Min Child Weight after optimization: 1.0000
Best XGBoost Subsample after optimization: 0.8000
Best XGBoost Col Sample By Tree after optimization: 0.8000
Best XGBoost Accuracy after optimization: 0.9630
