In [4]:
# import for LHD method
import sys
sys.path.insert(1, "/Users/lazayxc/Documents/GitHub/")
from hypercube.core.base_LHDmaximin_SVC import LHSTuner
from pyDOE import lhs

import numpy as np
import pandas as pd

In [5]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [6]:
from sklearn.datasets import make_classification

# Generate synthetic dataset
x, y = make_classification(n_samples=1000, n_features=20, 
                           n_informative=2, n_redundant=2, 
                           n_classes=2, random_state=42)

In [7]:
# Define hyperparameter range
param_space = {
    'C': (pow(2, -7), pow(2, 17)),     # Continuous range for C from 2^-7 to 2^17
    'gamma': (pow(2, -17), pow(2, 5))  # Continuous range for gamma from 2^-17 to 2^5
}
default_kernel = 'sigmoid'

In [8]:
# Initialize SVM classifier
svm_clf = SVC(kernel=default_kernel, random_state=42)

## Grid Search Design

In [10]:
# Create log-spaced range
C_range = np.logspace(-4, 2, num=13)  # 13 values from 10^-4 to 10^2
gamma_range = np.logspace(-4, 2, num=13)  # 13 values from 10^-4 to 10^2

# Define the parameter grid
param_grid = {
    'C': C_range,
    'gamma': gamma_range
}

print(C_range)

# Initialize GridSearchCV
grid_search = GridSearchCV(svm_clf, param_grid, cv=5)


[1.00000000e-04 3.16227766e-04 1.00000000e-03 3.16227766e-03
 1.00000000e-02 3.16227766e-02 1.00000000e-01 3.16227766e-01
 1.00000000e+00 3.16227766e+00 1.00000000e+01 3.16227766e+01
 1.00000000e+02]


In [11]:
# Perform the grid search
grid_search.fit(x, y)

In [12]:
best_model = grid_search.best_estimator_

print("Best Model:",best_model)

best_params = grid_search.best_params_
best_index = grid_search.best_index_

# Extracting the individual CV scores for the best parameter set
best_cv_scores_grid = [
    grid_search.cv_results_[f'split{i}_test_score'][best_index] for i in range(5)
]

print("Best CV Score:", best_cv_scores_grid)

# Total number of runs
total_runs_grid = len(grid_search.cv_results_['mean_test_score']) * grid_search.cv
print("Total Number of Runs:", total_runs_grid)

Best Model: SVC(C=10.0, gamma=0.0031622776601683794, kernel='sigmoid', random_state=42)
Best CV Score: [0.905, 0.9, 0.875, 0.84, 0.85]
Total Number of Runs: 845


## LHD Maximin Design

In [13]:
params = {
    "C": [(0.0001, 100), "unif"],
    "gamma": [(0.0001, 100), "log10"]}

unit_range = [(0,1),(0,1)]
param_range = list(params.values())


In [14]:
params = {
    "C": [(0.0001, 100), "log10"],
    "gamma": [(0.0001, 100), "log10"]}


tune = LHSTuner(estimator=svm_clf, param=params, metric='accuracy', cv=5, random_state=42, n_samples=10)
tune.fit(x, y, method='lm_fit') 

In [15]:
tune.summary()

total_runs_lhd = tune.n_samples * tune.cv
print("Total Number of Runs:", total_runs_lhd)

Location:
           coef   p_val       R^2
inter  0.604418  0.0000  0.414547
C      0.011702  0.0899          
gamma  0.006109  0.1803          

Dispersion:
           coef   p_val       R^2
inter -8.717532  0.0000  0.631267
C     -0.005437  0.7366          
gamma  0.034906  0.0139          

Best parameter combination: {'C': 28.2735592155715, 'gamma': 0.006806957390546307}
Best CV scores: [0.885 0.855 0.88  0.86  0.88 ]
Total Number of Runs: 50
