## Now Training Multiple Models and Comparing Performance Using PyCaret
- GitHub link: https://github.com/pycaret/pycaret
- Documentation and tutorials also found on the GitHub

In [1]:
# General imports
import pycaret
from pycaret.classification import *
import pandas as pd
pycaret.__version__

'3.3.2'

In [7]:
# load in data 
train_data = pd.read_csv("../motum/data_for_pycaret_train.csv")
test_data = pd.read_csv("../motum/data_for_pycaret_test.csv")
print(train_data.shape)
print(test_data.shape)

# initializing pycaret setup
s = setup(
    data=train_data,
    test_data=test_data,            
    target="label",
    session_id=42,
    fold_strategy="groupkfold",     # we are doing a group-aware cross validation
    fold_groups="subject_id",     
    ignore_features=["subject_id"], # keep for grouping, not modeling
    fold=8, # number of CV folds
    index=False,
    normalize=True,
    normalize_method='zscore'
)

(153, 19)
(39, 19)


Unnamed: 0,Description,Value
0,Session id,42
1,Target,label
2,Target type,Binary
3,Target mapping,"glioma: 0, metastasis: 1"
4,Original data shape,"(192, 19)"
5,Transformed data shape,"(192, 18)"
6,Transformed train set shape,"(153, 18)"
7,Transformed test set shape,"(39, 18)"
8,Ignore features,1
9,Numeric features,17


In [18]:
# initiating classification experiment class
exp = ClassificationExperiment()
type(exp)

# initiating setup on exp
exp.setup(
    data=train_data,
    test_data=test_data,            
    target="label",
    session_id=42,
    fold_strategy="groupkfold",     # we are doing a group-aware cross validation
    fold_groups="subject_id",     
    ignore_features=["subject_id"], # keep for grouping, not modeling
    fold=8, # number of CV folds
    index=False,
    normalize=True,
    normalize_method='zscore'
)

Unnamed: 0,Description,Value
0,Session id,42
1,Target,label
2,Target type,Binary
3,Target mapping,"glioma: 0, metastasis: 1"
4,Original data shape,"(192, 19)"
5,Transformed data shape,"(192, 18)"
6,Transformed train set shape,"(153, 18)"
7,Transformed test set shape,"(39, 18)"
8,Ignore features,1
9,Numeric features,17


<pycaret.classification.oop.ClassificationExperiment at 0x376df99f0>

In [34]:
# comparing these basline models now
best = compare_models(probability_threshold = 0.5, turbo=False, n_select=-1)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.7093,0.7632,0.7093,0.7464,0.7044,0.4101,0.4348,0.02
qda,Quadratic Discriminant Analysis,0.6756,0.7239,0.6756,0.706,0.6503,0.3308,0.3521,0.005
rbfsvm,SVM - Radial Kernel,0.6706,0.7392,0.6706,0.7112,0.6631,0.3214,0.3501,0.005
knn,K Neighbors Classifier,0.6657,0.6418,0.6657,0.7086,0.6586,0.3201,0.3508,0.005
gpc,Gaussian Process Classifier,0.6607,0.678,0.6607,0.7052,0.6559,0.3126,0.3397,0.0088
lr,Logistic Regression,0.6458,0.7206,0.6458,0.6872,0.6408,0.3028,0.3242,0.0088
catboost,CatBoost Classifier,0.6409,0.7273,0.6409,0.7088,0.6314,0.2822,0.3274,0.2275
mlp,MLP Classifier,0.628,0.689,0.628,0.6858,0.6132,0.255,0.2945,0.02
ridge,Ridge Classifier,0.627,0.6654,0.627,0.6648,0.6203,0.2519,0.2754,0.005
lda,Linear Discriminant Analysis,0.621,0.6581,0.621,0.6607,0.6155,0.242,0.2663,0.005


In [35]:
models()

Unnamed: 0_level_0,Name,Reference,Turbo
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
lr,Logistic Regression,sklearn.linear_model._logistic.LogisticRegression,True
knn,K Neighbors Classifier,sklearn.neighbors._classification.KNeighborsCl...,True
nb,Naive Bayes,sklearn.naive_bayes.GaussianNB,True
dt,Decision Tree Classifier,sklearn.tree._classes.DecisionTreeClassifier,True
svm,SVM - Linear Kernel,sklearn.linear_model._stochastic_gradient.SGDC...,True
rbfsvm,SVM - Radial Kernel,sklearn.svm._classes.SVC,False
gpc,Gaussian Process Classifier,sklearn.gaussian_process._gpc.GaussianProcessC...,False
mlp,MLP Classifier,sklearn.neural_network._multilayer_perceptron....,False
ridge,Ridge Classifier,sklearn.linear_model._ridge.RidgeClassifier,True
rf,Random Forest Classifier,sklearn.ensemble._forest.RandomForestClassifier,True


In [24]:
# trying out SVM sigmoid and polynomial kernels
# create SVM with polynomial kernel
from sklearn.svm import SVC

# trying polynomial SVM with degree 2 and 3
svm_poly2 = SVC(kernel='poly', degree=2)
svm_poly2_trained = create_model(svm_poly2)

svm_poly3 = SVC(kernel='poly', degree=3)
svm_poly3_trained = create_model(svm_poly3)

# create SVM with sigmoid kernel
svm_sigmoid = SVC(kernel='sigmoid')
svm_sigmoid_trained = create_model(svm_sigmoid)

# results show us that SVM with rbf would work best (with the settings we are using)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.4286,0.4333,0.4286,0.5476,0.4524,-0.0769,-0.0913
1,0.7619,0.9259,0.7619,0.8469,0.7565,0.5455,0.6124
2,0.5238,0.5741,0.5238,0.4929,0.4898,-0.0294,-0.0323
3,0.6111,0.6296,0.6111,0.7812,0.5418,0.2222,0.3536
4,0.7778,1.0,0.7778,0.8462,0.7662,0.5556,0.6202
5,0.3333,0.7083,0.3333,0.1111,0.1667,0.0,0.0
6,0.5556,0.6296,0.5556,0.5584,0.55,0.1111,0.114
7,0.6111,0.6173,0.6111,0.7812,0.5418,0.2222,0.3536
Mean,0.5754,0.6898,0.5754,0.6207,0.5331,0.1938,0.2413
Std,0.1421,0.1746,0.1421,0.2342,0.1758,0.2301,0.2661


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.4286,0.4111,0.4286,0.5476,0.4524,-0.0769,-0.0913
1,0.7143,0.8519,0.7143,0.8286,0.7024,0.4615,0.5477
2,0.7143,0.8519,0.7143,0.7637,0.7117,0.4474,0.4812
3,0.5,0.4938,0.5,0.5,0.4985,0.0,0.0
4,0.5556,1.0,0.5556,0.7647,0.4462,0.1111,0.2425
5,0.3333,0.7778,0.3333,0.1111,0.1667,0.0,0.0
6,0.6667,0.7778,0.6667,0.6753,0.6625,0.3333,0.3419
7,0.7222,0.8272,0.7222,0.75,0.7143,0.4444,0.4714
Mean,0.5794,0.7489,0.5794,0.6176,0.5443,0.2151,0.2492
Std,0.139,0.1841,0.139,0.219,0.1798,0.215,0.2348


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.4286,0.3889,0.4286,0.5918,0.439,0.0,0.0
1,0.7143,0.75,0.7143,0.7143,0.7143,0.4167,0.4167
2,0.4286,0.6111,0.4286,0.4082,0.4148,-0.2,-0.2041
3,0.5556,0.4691,0.5556,0.5692,0.5325,0.1111,0.124
4,0.5,0.716,0.5,0.5,0.4109,0.0,0.0
5,0.6667,0.7917,0.6667,0.7,0.6753,0.3077,0.3162
6,0.5556,0.4568,0.5556,0.5692,0.5325,0.1111,0.124
7,0.7778,0.9506,0.7778,0.8462,0.7662,0.5556,0.6202
Mean,0.5784,0.6418,0.5784,0.6124,0.5607,0.1628,0.1746
Std,0.1215,0.1817,0.1215,0.1281,0.1319,0.2318,0.2467


In [36]:
# creating a LASSO model (l1 penalty)
lasso_clf = create_model('lr', penalty='l1', solver='liblinear')

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.619,0.6889,0.619,0.6987,0.6374,0.2222,0.2412
1,0.9048,0.9722,0.9048,0.9184,0.9025,0.8,0.8165
2,0.381,0.5,0.381,0.3893,0.3402,-0.1519,-0.1936
3,0.5556,0.4568,0.5556,0.5692,0.5325,0.1111,0.124
4,0.7222,0.9012,0.7222,0.75,0.7143,0.4444,0.4714
5,0.6667,0.8611,0.6667,0.8333,0.6667,0.4,0.5
6,0.4444,0.4444,0.4444,0.4308,0.4156,-0.1111,-0.124
7,0.6667,0.9136,0.6667,0.7077,0.6494,0.3333,0.3721
Mean,0.62,0.7173,0.62,0.6622,0.6073,0.256,0.2759
Std,0.1533,0.2087,0.1533,0.1742,0.1653,0.2919,0.3152


In [None]:
# now training an extra trees classifier
et = create_model('et')

# defining a custom grid to find optimal hyperparameters
params = {
    "max_depth": [5, 7, 10],
    "n_estimators": [50, 100, 200, 300, 400],
    "max_features": ['sqrt', 'log2', None],
    "min_samples_leaf": [1, 2],
    "min_samples_split": [2, 4]
}

# tune with custom grid
tuned_et = tune_model(et, custom_grid=params, n_iter=100)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.4762,0.5222,0.4762,0.5779,0.5008,-0.0267,-0.0302
1,0.8571,0.8519,0.8571,0.8857,0.8508,0.6957,0.7303
2,0.5714,0.6111,0.5714,0.5504,0.526,0.0597,0.07
3,0.5556,0.5617,0.5556,0.5556,0.5556,0.1111,0.1111
4,0.6667,0.7222,0.6667,0.6667,0.6667,0.3333,0.3333
5,0.7778,0.8611,0.7778,0.8667,0.7833,0.5714,0.6325
6,0.8889,0.9753,0.8889,0.9091,0.8875,0.7778,0.7977
7,0.8333,1.0,0.8333,0.875,0.8286,0.6667,0.7071
Mean,0.7034,0.7632,0.7034,0.7359,0.6999,0.3986,0.419
Std,0.1469,0.1739,0.1469,0.1523,0.1472,0.2992,0.3152


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5238,0.5111,0.5238,0.6052,0.5467,0.0278,0.0302
1,0.7619,0.7963,0.7619,0.761,0.7597,0.507,0.5095
2,0.6667,0.3519,0.6667,0.7895,0.5982,0.2462,0.3746
3,0.6111,0.5679,0.6111,0.625,0.6,0.2222,0.2357
4,0.6667,0.8642,0.6667,0.6667,0.6667,0.3333,0.3333
5,0.7778,0.8611,0.7778,0.8667,0.7833,0.5714,0.6325
6,0.8889,0.9383,0.8889,0.9091,0.8875,0.7778,0.7977
7,0.8889,0.9877,0.8889,0.9091,0.8875,0.7778,0.7977
Mean,0.7232,0.7348,0.7232,0.7665,0.7162,0.4329,0.4639
Std,0.1215,0.214,0.1215,0.1158,0.1243,0.2539,0.2546


Fitting 8 folds for each of 100 candidates, totalling 800 fits
{'bootstrap': False, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 2, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 400, 'n_jobs': -1, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}


In [None]:
# get all hyperparameters of the best model
best_params = tuned_et.get_params()
print(best_params)

# accessing specific hyperparameters
print(f"Best max_depth: {tuned_et.get_params()['max_depth']}")
print(f"Best n_estimators: {tuned_et.get_params()['n_estimators']}")
print(f"Best max_features: {tuned_et.get_params()['max_features']}")
print(f"Best min_samples_leaf: {tuned_et.get_params()['min_samples_leaf']}")
print(f"Best min_samples_split: {tuned_et.get_params()['min_samples_split']}")

{'bootstrap': False, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 2, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 400, 'n_jobs': -1, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}
Best max_depth: 10
Best n_estimators: 400
Best max_features: sqrt
Best min_samples_leaf: 2
Best min_samples_split: 2


In [32]:
# now training an extra trees classifier
et2 = create_model('et')

# defining a custom grid to find optimal hyperparameters, with greater numbers since some of 
# the best hyperparameters were the max of the grid we searched
params2 = {
    "max_depth": [5, 10, 15, 20, 40],
    "n_estimators": [200, 400, 600, 800],
    "max_features": ['sqrt', 'log2', None],
    "min_samples_leaf": [2, 4, 6],
    "min_samples_split": [2, 4]
}

# tune with custom grid
tuned_et2 = tune_model(et, custom_grid=params2, n_iter=100)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.4762,0.5222,0.4762,0.5779,0.5008,-0.0267,-0.0302
1,0.8571,0.8519,0.8571,0.8857,0.8508,0.6957,0.7303
2,0.5714,0.6111,0.5714,0.5504,0.526,0.0597,0.07
3,0.5556,0.5617,0.5556,0.5556,0.5556,0.1111,0.1111
4,0.6667,0.7222,0.6667,0.6667,0.6667,0.3333,0.3333
5,0.7778,0.8611,0.7778,0.8667,0.7833,0.5714,0.6325
6,0.8889,0.9753,0.8889,0.9091,0.8875,0.7778,0.7977
7,0.8333,1.0,0.8333,0.875,0.8286,0.6667,0.7071
Mean,0.7034,0.7632,0.7034,0.7359,0.6999,0.3986,0.419
Std,0.1469,0.1739,0.1469,0.1523,0.1472,0.2992,0.3152


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.5238,0.4667,0.5238,0.6052,0.5467,0.0278,0.0302
1,0.7143,0.7778,0.7143,0.7143,0.7143,0.4167,0.4167
2,0.6667,0.3704,0.6667,0.7895,0.5982,0.2462,0.3746
3,0.6667,0.5185,0.6667,0.6667,0.6667,0.3333,0.3333
4,0.8333,0.8519,0.8333,0.875,0.8286,0.6667,0.7071
5,0.8333,0.9444,0.8333,0.8889,0.8381,0.6667,0.7071
6,0.8333,0.9506,0.8333,0.8375,0.8328,0.6667,0.6708
7,0.8333,1.0,0.8333,0.875,0.8286,0.6667,0.7071
Mean,0.7381,0.735,0.7381,0.7815,0.7317,0.4613,0.4934
Std,0.1078,0.2313,0.1078,0.1006,0.11,0.2295,0.2315


Fitting 8 folds for each of 100 candidates, totalling 800 fits


In [33]:
# get all hyperparameters of the best model
best_params = tuned_et2.get_params()
print(best_params)

# accessing specific hyperparameters
print(f"Best max_depth: {tuned_et2.get_params()['max_depth']}")
print(f"Best n_estimators: {tuned_et2.get_params()['n_estimators']}")
print(f"Best max_features: {tuned_et2.get_params()['max_features']}")
print(f"Best min_samples_leaf: {tuned_et2.get_params()['min_samples_leaf']}")
print(f"Best min_samples_split: {tuned_et2.get_params()['min_samples_split']}")

{'bootstrap': False, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': 10, 'max_features': None, 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 4, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'monotonic_cst': None, 'n_estimators': 200, 'n_jobs': -1, 'oob_score': False, 'random_state': 42, 'verbose': 0, 'warm_start': False}
Best max_depth: 10
Best n_estimators: 200
Best max_features: None
Best min_samples_leaf: 4
Best min_samples_split: 2


In [27]:
# making RBF-SVM model and tuning hyperparameters
rbf_svm = create_model('rbfsvm')

params_svm = {
    'C': [0.1, 1, 10, 100, 1000],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001]
}

# tune with custom grid
tuned_rbf_svm = tune_model(rbf_svm, custom_grid=params_svm, n_iter=100)

# get all hyperparameters of the best model
best_params = tuned_rbf_svm.get_params()
print(best_params)

# accessing specific hyperparameters
print(f"Best C: {tuned_rbf_svm.get_params()['C']}")
print(f"Best gamma: {tuned_rbf_svm.get_params()['gamma']}")


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.4286,0.4333,0.4286,0.4753,0.449,-0.2727,-0.2791
1,0.9048,0.9815,0.9048,0.9184,0.9025,0.8,0.8165
2,0.5238,0.4352,0.5238,0.4929,0.4898,-0.0294,-0.0323
3,0.4444,0.5185,0.4444,0.4416,0.4375,-0.1111,-0.114
4,0.7778,0.9012,0.7778,0.8462,0.7662,0.5556,0.6202
5,0.6667,0.9028,0.6667,0.8333,0.6667,0.4,0.5
6,0.7778,0.7531,0.7778,0.7922,0.775,0.5556,0.5698
7,0.8889,0.9877,0.8889,0.8889,0.8889,0.7778,0.7778
Mean,0.6766,0.7392,0.6766,0.7111,0.6719,0.3345,0.3574
Std,0.179,0.2259,0.179,0.1905,0.1794,0.3897,0.4033


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.619,0.6889,0.619,0.6987,0.6374,0.2222,0.2412
1,0.8571,0.9722,0.8571,0.8585,0.8558,0.7042,0.7077
2,0.381,0.2315,0.381,0.398,0.3668,-0.1818,-0.2041
3,0.5556,0.4938,0.5556,0.5692,0.5325,0.1111,0.124
4,0.7778,0.9136,0.7778,0.8462,0.7662,0.5556,0.6202
5,0.6667,0.8333,0.6667,0.8333,0.6667,0.4,0.5
6,0.7222,0.679,0.7222,0.75,0.7143,0.4444,0.4714
7,0.8889,0.9753,0.8889,0.9091,0.8875,0.7778,0.7977
Mean,0.6835,0.7235,0.6835,0.7329,0.6784,0.3792,0.4073
Std,0.1562,0.2428,0.1562,0.1621,0.1599,0.2982,0.3127


Fitting 8 folds for each of 25 candidates, totalling 200 fits
{'C': 100, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 0.001, 'kernel': 'rbf', 'max_iter': -1, 'probability': True, 'random_state': 42, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Best C: 100
Best gamma: 0.001


In [30]:
# making RBF-SVM model and tuning hyperparameters
rbf_svm2 = create_model('rbfsvm')

# defining more specific hyperparameters now
params_svm2 = {
    'C': [50, 100, 200, 300, 400, 500],
    'gamma': [0.0005, 0.001, 0.005, 0.007]
}

# tune with custom grid
tuned_rbf_svm2 = tune_model(rbf_svm2, custom_grid=params_svm2, n_iter=100)

# get all hyperparameters of the best model
best_params = tuned_rbf_svm2.get_params()
print(best_params)

# accessing specific hyperparameters
print(f"Best C: {tuned_rbf_svm2.get_params()['C']}")
print(f"Best gamma: {tuned_rbf_svm2.get_params()['gamma']}")

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.4286,0.4333,0.4286,0.4753,0.449,-0.2727,-0.2791
1,0.9048,0.9815,0.9048,0.9184,0.9025,0.8,0.8165
2,0.5238,0.4352,0.5238,0.4929,0.4898,-0.0294,-0.0323
3,0.4444,0.5185,0.4444,0.4416,0.4375,-0.1111,-0.114
4,0.7778,0.9012,0.7778,0.8462,0.7662,0.5556,0.6202
5,0.6667,0.9028,0.6667,0.8333,0.6667,0.4,0.5
6,0.7778,0.7531,0.7778,0.7922,0.775,0.5556,0.5698
7,0.8889,0.9877,0.8889,0.8889,0.8889,0.7778,0.7778
Mean,0.6766,0.7392,0.6766,0.7111,0.6719,0.3345,0.3574
Std,0.179,0.2259,0.179,0.1905,0.1794,0.3897,0.4033


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.619,0.6778,0.619,0.6987,0.6374,0.2222,0.2412
1,0.9048,0.963,0.9048,0.9184,0.9025,0.8,0.8165
2,0.4286,0.2685,0.4286,0.4571,0.4048,-0.0769,-0.0913
3,0.5556,0.4938,0.5556,0.5692,0.5325,0.1111,0.124
4,0.7778,0.9259,0.7778,0.8462,0.7662,0.5556,0.6202
5,0.6667,0.8611,0.6667,0.8333,0.6667,0.4,0.5
6,0.7222,0.642,0.7222,0.75,0.7143,0.4444,0.4714
7,0.8889,0.9753,0.8889,0.9091,0.8875,0.7778,0.7977
Mean,0.6954,0.7259,0.6954,0.7478,0.689,0.4043,0.435
Std,0.1526,0.2369,0.1526,0.1545,0.1581,0.2897,0.3017


Fitting 8 folds for each of 24 candidates, totalling 192 fits
{'C': 200, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 0.0005, 'kernel': 'rbf', 'max_iter': -1, 'probability': True, 'random_state': 42, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Best C: 200
Best gamma: 0.0005


In [43]:
# making lasso model and tuning hyperparameters
lasso_clf = create_model('lr', penalty='l1', solver='liblinear')

# Define custom grid for L1 logistic regression hyperparameters
params = {
    "C": [0.001, 0.01, 0.1, 1, 10, 100, 1000],  # Inverse regularization strength
    "max_iter": [15, 20, 25, 50, 100, 200, 500, 1000],          # Maximum iterations
    "tol": [1e-4, 1e-3, 1e-2],                  # Tolerance for stopping criteria
    "class_weight": [None, 'balanced']          # Handle class imbalance
}

# Tune with custom grid (use search_algorithm='grid' for exhaustive grid search)
# tuned_lasso_clf = tune_model(lasso_clf, custom_grid=params, search_algorithm='grid')

# Or use random search with n_iter
tuned_lasso_clf = tune_model(lasso_clf, custom_grid=params, n_iter=100)

# get all hyperparameters of the best model
best_params = tuned_lasso_clf.get_params()
print(best_params)

# accessing specific hyperparameters
print(f"Best C: {tuned_lasso_clf.get_params()['C']}")
print(f"Best max_iter: {tuned_lasso_clf.get_params()['max_iter']}")
print(f"Best tol: {tuned_lasso_clf.get_params()['tol']}")
print(f"Best class_weight: {tuned_lasso_clf.get_params()['class_weight']}")

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.619,0.6889,0.619,0.6987,0.6374,0.2222,0.2412
1,0.9048,0.9722,0.9048,0.9184,0.9025,0.8,0.8165
2,0.381,0.5,0.381,0.3893,0.3402,-0.1519,-0.1936
3,0.5556,0.4568,0.5556,0.5692,0.5325,0.1111,0.124
4,0.7222,0.9012,0.7222,0.75,0.7143,0.4444,0.4714
5,0.6667,0.8611,0.6667,0.8333,0.6667,0.4,0.5
6,0.4444,0.4444,0.4444,0.4308,0.4156,-0.1111,-0.124
7,0.6667,0.9136,0.6667,0.7077,0.6494,0.3333,0.3721
Mean,0.62,0.7173,0.62,0.6622,0.6073,0.256,0.2759
Std,0.1533,0.2087,0.1533,0.1742,0.1653,0.2919,0.3152


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6667,0.7,0.6667,0.7727,0.6824,0.3467,0.392
1,0.8571,0.9815,0.8571,0.8585,0.8558,0.7042,0.7077
2,0.5714,0.4815,0.5714,0.5952,0.5714,0.16,0.1667
3,0.5556,0.4691,0.5556,0.5692,0.5325,0.1111,0.124
4,0.6667,0.6667,0.6667,0.6667,0.6667,0.3333,0.3333
5,0.6667,0.8333,0.6667,0.8333,0.6667,0.4,0.5
6,0.3333,0.4444,0.3333,0.2,0.25,-0.3333,-0.4472
7,0.7778,0.963,0.7778,0.8462,0.7662,0.5556,0.6202
Mean,0.6369,0.6924,0.6369,0.6677,0.624,0.2847,0.2996
Std,0.1475,0.2041,0.1475,0.2063,0.1703,0.2951,0.3401


Fitting 8 folds for each of 100 candidates, totalling 800 fits
{'C': 100, 'class_weight': 'balanced', 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 25, 'multi_class': 'auto', 'n_jobs': None, 'penalty': 'l1', 'random_state': 42, 'solver': 'liblinear', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}
Best C: 100
Best max_iter: 25
Best tol: 0.0001
Best class_weight: balanced
