In [30]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [31]:
# import for LHD method
import sys
sys.path.insert(1, "/Users/lazayxc/Documents/GitHub/")
from hypercube.core.base_LHDmaximin import LHSTuner

import numpy as np
import pandas as pd

In [32]:
from sklearn.datasets import make_classification

# Generate synthetic dataset
x, y = make_classification(n_samples=1000, n_features=20, 
                           n_informative=2, n_redundant=2, 
                           n_classes=2, random_state=42)


In [37]:
# Define hyperparameter range
param_space = {
    'C': (1, 100),       # Continuous range for C
    'gamma': (1, 10)   # Continuous range for gamma on a log scale
}

default_kernel = 'rbf'

In [34]:
# Initialize SVM classifier
svm_clf = SVC(kernel=default_kernel, random_state=42)

## Grid Search Design

In [44]:
# Initialize GridSearchCV
grid_search = GridSearchCV(svm_clf, param_grid={
'C': range(param_space['C'][0], param_space['C'][1] + 1),
    'gamma': range(param_space['gamma'][0], param_space['gamma'][1] + 1)

}, cv=5)

In [45]:
# Perform the grid search
grid_search.fit(x, y)

In [46]:
best_model = grid_search.best_estimator_

print("Best Model:",best_model)

best_params = grid_search.best_params_
best_index = grid_search.best_index_

# Extracting the individual CV scores for the best parameter set
best_cv_scores_grid = [
    grid_search.cv_results_[f'split{i}_test_score'][best_index] for i in range(5)
]

print("Best CV Score:", best_cv_scores_grid)


Best Model: SVC(C=2, gamma=1, random_state=42)
Best CV Score: [0.615, 0.565, 0.62, 0.595, 0.61]


In [47]:
# Total number of runs
total_runs_grid = len(grid_search.cv_results_['mean_test_score']) * grid_search.cv

print("Total Number of Runs:", total_runs_grid)

Total Number of Runs: 5000


## LHD Maximin Design

In [53]:
tune = LHSTuner(estimator=svm_clf, param=param_space, metric='accuracy', cv=5, random_state=42, n_samples=10)

# Fit the tuner with your data and desired method
tune.fit(x, y, method='lm_fit')  # You can choose 'pair_t', 'anova', or 'lm_fit'


  R_sqr = 1 - RSS/TSS


In [54]:
tune.summary()

total_runs_lhd = tune.n_samples * tune.cv
print("Total Number of Runs:", total_runs_lhd)

Location:
               coef   p_val  R^2
inter  4.780000e-01  0.0000 -inf
C      2.783324e-18  0.7146     
gamma  1.882959e-17  0.8148     

Dispersion:
               coef   p_val  R^2
inter -8.902856e+00  0.0000 -inf
C     -4.473689e-17  0.7585     
gamma -3.054778e-16  0.8426     

Best parameter combination: {'C': 5, 'gamma': 9}
Best CV scores: [0.465 0.465 0.485 0.48  0.495]
Total Number of Runs: 50
