In [1]:
# Importing Libraries
try:
    import pandas as pd
    import numpy as np
    from numpy import set_printoptions
    import os,sys
    import re
    # importing algorithms
    from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
    from sklearn.linear_model import LogisticRegression
    from sklearn import svm
    from sklearn.metrics import confusion_matrix, classification_report
    from sklearn.preprocessing import Normalizer
    from sklearn.preprocessing import StandardScaler
except Exception as e:
    print("Error is due to",e)
pwd = os.getcwd()
labels_df = pd.read_csv(pwd+"//Datasets//Kabita//Input//kabita_dataset_labels.csv")

## Verloop BERT Sentence Transformer

In [28]:
# V BERT vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//SentenceTransformers//bert_vectorized_kabita_dataset_vbert.csv")
y_df = labels_df['kabita_labels']
# Test train split
x_train,x_test,y_train,y_test = train_test_split(x_df, y_df, test_size=0.30,random_state=21,stratify=y_df)

# Normalize scaling of train data
normalize_model = Normalizer()
nscaled_data_train = normalize_model.fit_transform(x_train)
# Normalize scaling of test data
nscaled_data_test = normalize_model.fit_transform(x_test)

# Standard scaling of train data
standard_model = StandardScaler()
sscaled_data_train = standard_model.fit_transform(x_train)
# Standard scaling of test data
sscaled_data_test = standard_model.fit_transform(x_test)

### Hyperparameter Tuning for No Scaled Data for Logistic Regression

In [40]:
# Random Search CV

# Set up the sample space
c_list = [0.1, 1, 10, 100, 1000]
penalty_list=['l2']
max_iter_list = [1000,2000,3000,4000,5000,6000]
# Create the grid
parameter_grid = {'C' : c_list, 'penalty': penalty_list, 'max_iter': max_iter_list}
# Define how many samples
number_models = 10
# Create a random search object
random_LR_class = RandomizedSearchCV(estimator = LogisticRegression(),
                                      param_distributions = parameter_grid,
                                      n_iter = number_models,
                                      scoring='accuracy',
                                      n_jobs=6,
                                      cv = 5,
                                      refit=True,
                                      return_train_score = True)
# Fit the object to our data
random_LR_class.fit(x_train, y_train)

RandomizedSearchCV(cv=5, estimator=LogisticRegression(), n_jobs=6,
                   param_distributions={'C': [0.1, 1, 10, 100, 1000],
                                        'max_iter': [1000, 2000, 3000, 4000,
                                                     5000, 6000],
                                        'penalty': ['l2']},
                   return_train_score=True, scoring='accuracy')

In [41]:
# print best parameter after tuning
print(random_LR_class.best_params_)
 
# print how our model looks after hyper-parameter tuning
print(random_LR_class.best_estimator_)

{'penalty': 'l2', 'max_iter': 5000, 'C': 1}
LogisticRegression(C=1, max_iter=5000)


In [42]:
# Predicted test outputs
random_LR_class_predictions = random_LR_class.predict(x_test)

#print Confusion Matrix
print(confusion_matrix(y_test,random_LR_class_predictions))
 
# print Classification Report
print(classification_report(y_test, random_LR_class_predictions))

[[164   2   0   3  32   8   1]
 [  1 171  10   8  11   8   1]
 [  0   1 196   6   1   6   0]
 [  3   6   4 166  15  14   2]
 [ 31  11   1  10 145   6   6]
 [  5  10   8  15   5 142  25]
 [  3   0   0   1   3  26 177]]
              precision    recall  f1-score   support

           1       0.79      0.78      0.79       210
           2       0.85      0.81      0.83       210
           3       0.89      0.93      0.91       210
           4       0.79      0.79      0.79       210
           5       0.68      0.69      0.69       210
           6       0.68      0.68      0.68       210
           7       0.83      0.84      0.84       210

    accuracy                           0.79      1470
   macro avg       0.79      0.79      0.79      1470
weighted avg       0.79      0.79      0.79      1470



In [43]:
# Examining results using pandas df
rand_cv_results_df = pd.DataFrame(random_LR_class.cv_results_)
pd.set_option('display.max_columns', None)
rand_cv_results_df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_penalty,param_max_iter,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,5.412535,0.682226,0.02294,0.005359,l2,5000,0.1,"{'penalty': 'l2', 'max_iter': 5000, 'C': 0.1}",0.782799,0.763848,0.779883,0.763848,0.759475,0.769971,0.009465,3,0.856778,0.864796,0.855685,0.856414,0.859694,0.858673,0.003351
1,24.905331,2.718335,0.021305,0.010119,l2,6000,10.0,"{'penalty': 'l2', 'max_iter': 6000, 'C': 10}",0.744898,0.753644,0.747813,0.744898,0.755102,0.749271,0.004324,6,0.996356,0.995991,0.997085,0.996356,0.997085,0.996574,0.000437
2,12.199599,1.90648,0.018429,0.006444,l2,5000,1.0,"{'penalty': 'l2', 'max_iter': 5000, 'C': 1}",0.77551,0.769679,0.787172,0.776968,0.769679,0.775802,0.006414,1,0.960277,0.958819,0.960641,0.954082,0.956633,0.95809,0.00245
3,11.993636,1.345737,0.026819,0.003537,l2,3000,1.0,"{'penalty': 'l2', 'max_iter': 3000, 'C': 1}",0.77551,0.769679,0.787172,0.776968,0.769679,0.775802,0.006414,1,0.960277,0.958819,0.960641,0.954082,0.956633,0.95809,0.00245
4,39.789203,2.625781,0.020878,0.007559,l2,2000,100.0,"{'penalty': 'l2', 'max_iter': 2000, 'C': 100}",0.741983,0.758017,0.72449,0.734694,0.740525,0.739942,0.010932,7,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
5,5.975999,0.783036,0.017419,0.002163,l2,6000,0.1,"{'penalty': 'l2', 'max_iter': 6000, 'C': 0.1}",0.782799,0.763848,0.779883,0.763848,0.759475,0.769971,0.009465,3,0.856778,0.864796,0.855685,0.856414,0.859694,0.858673,0.003351
6,6.07726,0.859855,0.025407,0.011448,l2,3000,0.1,"{'penalty': 'l2', 'max_iter': 3000, 'C': 0.1}",0.782799,0.763848,0.779883,0.763848,0.759475,0.769971,0.009465,3,0.856778,0.864796,0.855685,0.856414,0.859694,0.858673,0.003351
7,21.698886,0.6443,0.023273,0.00858,l2,1000,100.0,"{'penalty': 'l2', 'max_iter': 1000, 'C': 100}",0.739067,0.760933,0.72449,0.733236,0.741983,0.739942,0.012077,8,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
8,61.211273,7.714237,0.019928,0.00602,l2,4000,1000.0,"{'penalty': 'l2', 'max_iter': 4000, 'C': 1000}",0.733236,0.749271,0.723032,0.723032,0.733236,0.732362,0.009608,9,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
9,48.964349,4.763919,0.013373,0.012049,l2,5000,1000.0,"{'penalty': 'l2', 'max_iter': 5000, 'C': 1000}",0.733236,0.749271,0.723032,0.723032,0.733236,0.732362,0.009608,9,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515


In [44]:
# Grid Search CV

# Set up the sample space
c_list = [0.1, 1, 2, 3]
penalty_list=['l2']
max_iter_list = [3000,4000,5000,6000]
# Create the grid
parameter_grid = {'C' : c_list, 'penalty': penalty_list, 'max_iter': max_iter_list}

# Create a grid search object
grid_LR_class = GridSearchCV(estimator = LogisticRegression(),
                             param_grid = parameter_grid,
                             scoring='accuracy',
                             n_jobs=4,
                             cv = 5,
                             refit=True,
                             return_train_score=True)

# Fit the object to our data
grid_LR_class.fit(x_train, y_train)

GridSearchCV(cv=5, estimator=LogisticRegression(), n_jobs=4,
             param_grid={'C': [0.1, 1, 2, 3],
                         'max_iter': [3000, 4000, 5000, 6000],
                         'penalty': ['l2']},
             return_train_score=True, scoring='accuracy')

In [45]:
# print best parameter after tuning
print(grid_LR_class.best_params_)
 
# print how our model looks after hyper-parameter tuning
print(grid_LR_class.best_estimator_)

{'C': 1, 'max_iter': 3000, 'penalty': 'l2'}
LogisticRegression(C=1, max_iter=3000)


In [46]:
# Predicted test outputs
grid_LR_class_predictions = grid_LR_class.predict(x_test)

#print Confusion Matrix
print(confusion_matrix(y_test, grid_LR_class_predictions))
 
# print Classification Report
print(classification_report(y_test, grid_LR_class_predictions))

[[164   2   0   3  32   8   1]
 [  1 171  10   8  11   8   1]
 [  0   1 196   6   1   6   0]
 [  3   6   4 166  15  14   2]
 [ 31  11   1  10 145   6   6]
 [  5  10   8  15   5 142  25]
 [  3   0   0   1   3  26 177]]
              precision    recall  f1-score   support

           1       0.79      0.78      0.79       210
           2       0.85      0.81      0.83       210
           3       0.89      0.93      0.91       210
           4       0.79      0.79      0.79       210
           5       0.68      0.69      0.69       210
           6       0.68      0.68      0.68       210
           7       0.83      0.84      0.84       210

    accuracy                           0.79      1470
   macro avg       0.79      0.79      0.79      1470
weighted avg       0.79      0.79      0.79      1470



In [47]:
# Examining results using pandas df
grid_cv_results_df = pd.DataFrame(grid_LR_class.cv_results_)
pd.set_option('display.max_columns', None)
grid_cv_results_df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_max_iter,param_penalty,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,3.857498,0.624945,0.024799,0.005936,0.1,3000,l2,"{'C': 0.1, 'max_iter': 3000, 'penalty': 'l2'}",0.782799,0.763848,0.779883,0.763848,0.759475,0.769971,0.009465,9,0.856778,0.864796,0.85605,0.856778,0.859694,0.858819,0.00324
1,3.527699,0.671671,0.015283,0.001265,0.1,4000,l2,"{'C': 0.1, 'max_iter': 4000, 'penalty': 'l2'}",0.782799,0.763848,0.779883,0.763848,0.759475,0.769971,0.009465,9,0.856778,0.864796,0.85605,0.856778,0.859694,0.858819,0.00324
2,3.391776,0.509967,0.024361,0.007268,0.1,5000,l2,"{'C': 0.1, 'max_iter': 5000, 'penalty': 'l2'}",0.782799,0.763848,0.779883,0.763848,0.759475,0.769971,0.009465,9,0.856778,0.864796,0.85605,0.856778,0.859694,0.858819,0.00324
3,3.535276,0.545039,0.024662,0.006316,0.1,6000,l2,"{'C': 0.1, 'max_iter': 6000, 'penalty': 'l2'}",0.782799,0.763848,0.779883,0.763848,0.759475,0.769971,0.009465,9,0.856778,0.864796,0.85605,0.856778,0.859694,0.858819,0.00324
4,8.40793,1.12666,0.024484,0.006408,1.0,3000,l2,"{'C': 1, 'max_iter': 3000, 'penalty': 'l2'}",0.77551,0.768222,0.787172,0.776968,0.769679,0.77551,0.006712,1,0.960277,0.959184,0.960641,0.954082,0.956268,0.95809,0.002525
5,8.710756,1.192086,0.025203,0.007409,1.0,4000,l2,"{'C': 1, 'max_iter': 4000, 'penalty': 'l2'}",0.77551,0.768222,0.787172,0.776968,0.769679,0.77551,0.006712,1,0.960277,0.959184,0.960641,0.954082,0.956268,0.95809,0.002525
6,8.732003,1.165581,0.029294,0.002741,1.0,5000,l2,"{'C': 1, 'max_iter': 5000, 'penalty': 'l2'}",0.77551,0.768222,0.787172,0.776968,0.769679,0.77551,0.006712,1,0.960277,0.959184,0.960641,0.954082,0.956268,0.95809,0.002525
7,8.925185,1.23476,0.024452,0.007142,1.0,6000,l2,"{'C': 1, 'max_iter': 6000, 'penalty': 'l2'}",0.77551,0.768222,0.787172,0.776968,0.769679,0.77551,0.006712,1,0.960277,0.959184,0.960641,0.954082,0.956268,0.95809,0.002525
8,10.198239,1.457679,0.015704,0.000165,2.0,3000,l2,"{'C': 2, 'max_iter': 3000, 'penalty': 'l2'}",0.769679,0.765306,0.77551,0.771137,0.768222,0.769971,0.003375,5,0.979956,0.977041,0.981414,0.975948,0.980321,0.978936,0.002079
9,10.130291,1.274789,0.020185,0.006545,2.0,4000,l2,"{'C': 2, 'max_iter': 4000, 'penalty': 'l2'}",0.769679,0.765306,0.77551,0.771137,0.768222,0.769971,0.003375,5,0.979956,0.977041,0.981414,0.975948,0.980321,0.978936,0.002079


### Hyperparameter Tuning for Normalized Data for SVM

In [3]:
# Random Search CV

# Set up the sample space
c_list = [0.1, 1, 10, 100, 1000]
gamma_list = ['scale', 1, 0.1, 0.01, 0.001, 0.0001]
kernel_list = ['poly','rbf']
# Create the grid
parameter_grid = {'C' : c_list, 'gamma' : gamma_list, 'kernel' : kernel_list}
# Define how many samples
number_models = 10
# Create a random search object
random_SVM_class = RandomizedSearchCV(estimator = svm.SVC(),
                                      param_distributions = parameter_grid,
                                      n_iter = number_models,
                                      scoring='accuracy',
                                      n_jobs=4,
                                      cv = 10,
                                      refit=True,
                                      return_train_score = True)
# Fit the object to our data
random_SVM_class.fit(nscaled_data_train, y_train)

RandomizedSearchCV(cv=10, estimator=SVC(), n_jobs=4,
                   param_distributions={'C': [0.1, 1, 10, 100, 1000],
                                        'gamma': ['scale', 1, 0.1, 0.01, 0.001,
                                                  0.0001],
                                        'kernel': ['poly', 'rbf']},
                   return_train_score=True, scoring='accuracy')

In [4]:
# print best parameter after tuning
print(random_SVM_class.best_params_)
 
# print how our model looks after hyper-parameter tuning
print(random_SVM_class.best_estimator_)

{'kernel': 'rbf', 'gamma': 1, 'C': 10}
SVC(C=10, gamma=1)


In [5]:
# Predicted test outputs
random_SVM_class_predictions = random_SVM_class.predict(nscaled_data_test)

#print Confusion Matrix
print(confusion_matrix(y_test,random_SVM_class_predictions))
 
# print Classification Report
print(classification_report(y_test, random_SVM_class_predictions))

[[168   1   0   1  30  10   0]
 [  1 179   9   6  10   4   1]
 [  0   1 196   7   2   4   0]
 [  3   5   4 170  15  11   2]
 [ 28   6   1   8 160   2   5]
 [  6   9   6  14   4 149  22]
 [  2   0   0   1   3  18 186]]
              precision    recall  f1-score   support

           1       0.81      0.80      0.80       210
           2       0.89      0.85      0.87       210
           3       0.91      0.93      0.92       210
           4       0.82      0.81      0.82       210
           5       0.71      0.76      0.74       210
           6       0.75      0.71      0.73       210
           7       0.86      0.89      0.87       210

    accuracy                           0.82      1470
   macro avg       0.82      0.82      0.82      1470
weighted avg       0.82      0.82      0.82      1470



In [6]:
# Examining results using pandas df
rand_cv_results_df = pd.DataFrame(random_SVM_class.cv_results_)
pd.set_option('display.max_columns', None)
rand_cv_results_df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_gamma,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,split5_train_score,split6_train_score,split7_train_score,split8_train_score,split9_train_score,mean_train_score,std_train_score
0,10.56475,0.429175,0.920068,0.138292,poly,1,0.1,"{'kernel': 'poly', 'gamma': 1, 'C': 0.1}",0.6793,0.696793,0.699708,0.6793,0.670554,0.688047,0.661808,0.71137,0.664723,0.705539,0.685714,0.01643,8,0.714286,0.713962,0.714286,0.715905,0.719469,0.713314,0.717201,0.71137,0.720441,0.712666,0.71529,0.002792
1,6.787252,0.280121,0.854823,0.056746,rbf,scale,1.0,"{'kernel': 'rbf', 'gamma': 'scale', 'C': 1}",0.781341,0.781341,0.74344,0.790087,0.766764,0.787172,0.746356,0.790087,0.731778,0.763848,0.768222,0.020251,5,0.850016,0.853256,0.855199,0.849692,0.851636,0.852608,0.85034,0.854227,0.849044,0.854551,0.852057,0.00211
2,19.358951,0.730882,1.195744,0.084298,poly,0.1,10.0,"{'kernel': 'poly', 'gamma': 0.1, 'C': 10}",0.533528,0.597668,0.542274,0.533528,0.542274,0.556851,0.530612,0.559767,0.54519,0.591837,0.553353,0.022613,9,0.563654,0.557175,0.559767,0.568837,0.561063,0.56171,0.570133,0.563978,0.565598,0.560415,0.563233,0.00386
3,19.997442,1.245283,1.227007,0.138532,poly,0.0001,10.0,"{'kernel': 'poly', 'gamma': 0.0001, 'C': 10}",0.533528,0.586006,0.539359,0.527697,0.530612,0.556851,0.527697,0.55102,0.533528,0.574344,0.546064,0.019516,10,0.555879,0.551344,0.55426,0.560415,0.556527,0.552964,0.564626,0.556203,0.558147,0.554908,0.556527,0.003622
4,9.270921,0.85926,1.003391,0.039062,rbf,0.0001,1000.0,"{'kernel': 'rbf', 'gamma': 0.0001, 'C': 1000}",0.688047,0.714286,0.702624,0.708455,0.682216,0.702624,0.682216,0.708455,0.658892,0.696793,0.694461,0.015905,7,0.712018,0.713962,0.71461,0.71299,0.719145,0.712342,0.718497,0.715258,0.720764,0.715905,0.715549,0.002859
5,4.626789,0.433491,0.981281,0.084837,rbf,0.1,10.0,"{'kernel': 'rbf', 'gamma': 0.1, 'C': 10}",0.769679,0.787172,0.746356,0.769679,0.781341,0.77551,0.746356,0.793003,0.737609,0.752187,0.765889,0.018163,6,0.843537,0.843213,0.845481,0.84127,0.837707,0.839002,0.836411,0.839002,0.839002,0.84127,0.84059,0.002716
6,7.120452,0.835517,0.858977,0.075204,poly,scale,1.0,"{'kernel': 'poly', 'gamma': 'scale', 'C': 1}",0.793003,0.790087,0.746356,0.781341,0.77551,0.769679,0.746356,0.798834,0.755102,0.778426,0.773469,0.017927,3,0.885649,0.885649,0.889213,0.886621,0.885649,0.889861,0.890833,0.887917,0.884354,0.889861,0.887561,0.002151
7,6.988788,0.500594,0.877948,0.083392,poly,1,1.0,"{'kernel': 'poly', 'gamma': 1, 'C': 1}",0.793003,0.790087,0.746356,0.781341,0.77551,0.769679,0.746356,0.798834,0.755102,0.778426,0.773469,0.017927,3,0.885973,0.885649,0.889213,0.886621,0.885649,0.889861,0.890833,0.887917,0.884354,0.889861,0.887593,0.002124
8,4.987109,0.442797,0.997592,0.127752,rbf,1,10.0,"{'kernel': 'rbf', 'gamma': 1, 'C': 10}",0.793003,0.784257,0.752187,0.798834,0.790087,0.804665,0.74344,0.798834,0.778426,0.760933,0.780466,0.020201,1,0.98769,0.98769,0.986395,0.988014,0.987042,0.987042,0.985423,0.984775,0.985423,0.985747,0.986524,0.001076
9,4.444266,0.379311,0.902272,0.057827,rbf,1,100.0,"{'kernel': 'rbf', 'gamma': 1, 'C': 100}",0.781341,0.784257,0.749271,0.790087,0.778426,0.790087,0.746356,0.801749,0.760933,0.760933,0.774344,0.017839,2,0.997408,0.997408,0.997085,0.997408,0.997408,0.998056,0.997085,0.997085,0.997408,0.997732,0.997408,0.00029


In [7]:
# Grid Search CV

# Set up the sample space
c_list = [1, 2, 3, 5, 10, 100]
gamma_list = ['auto', 'scale', 1, 0.1]
kernel_list = ['poly','rbf']
# Create the grid
parameter_grid = {'C' : c_list, 'gamma' : gamma_list, 'kernel' : kernel_list}

# Create a grid search object
grid_SVM_class = GridSearchCV(estimator = svm.SVC(),
                             param_grid = parameter_grid,
                             scoring='accuracy',
                             n_jobs=4,
                             cv = 5,
                             refit=True,
                             return_train_score=True)

# Fit the object to our data
grid_SVM_class.fit(nscaled_data_train, y_train)

GridSearchCV(cv=5, estimator=SVC(), n_jobs=4,
             param_grid={'C': [1, 2, 3, 5, 10, 100],
                         'gamma': ['auto', 'scale', 1, 0.1],
                         'kernel': ['poly', 'rbf']},
             return_train_score=True, scoring='accuracy')

In [8]:
# print best parameter after tuning
print(grid_SVM_class.best_params_)
 
# print how our model looks after hyper-parameter tuning
print(grid_SVM_class.best_estimator_)

{'C': 3, 'gamma': 'scale', 'kernel': 'rbf'}
SVC(C=3)


In [9]:
# Predicted test outputs
grid_SVM_class_predictions = grid_SVM_class.predict(nscaled_data_test)

#print Confusion Matrix
print(confusion_matrix(y_test, grid_SVM_class_predictions))
 
# print Classification Report
print(classification_report(y_test, grid_SVM_class_predictions))

[[161   2   0   2  32  12   1]
 [  1 172  12   5  12   7   1]
 [  0   3 196   4   2   5   0]
 [  4   5   4 162  18  15   2]
 [ 20   9   1   9 163   2   6]
 [  7   8   6  12   3 139  35]
 [  0   0   0   0   2  15 193]]
              precision    recall  f1-score   support

           1       0.83      0.77      0.80       210
           2       0.86      0.82      0.84       210
           3       0.89      0.93      0.91       210
           4       0.84      0.77      0.80       210
           5       0.70      0.78      0.74       210
           6       0.71      0.66      0.69       210
           7       0.81      0.92      0.86       210

    accuracy                           0.81      1470
   macro avg       0.81      0.81      0.81      1470
weighted avg       0.81      0.81      0.81      1470



In [10]:
# Examining results using pandas df
grid_cv_results_df = pd.DataFrame(grid_SVM_class.cv_results_)
pd.set_option('display.max_columns', None)
grid_cv_results_df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,15.543667,0.621895,2.177389,0.081281,1,auto,poly,"{'C': 1, 'gamma': 'auto', 'kernel': 'poly'}",0.553936,0.530612,0.543732,0.543732,0.555394,0.545481,0.00891,34,0.551385,0.560496,0.556122,0.562318,0.557216,0.557507,0.003783
1,14.536932,1.011101,2.367405,0.176674,1,auto,rbf,"{'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}",0.479592,0.463557,0.462099,0.441691,0.479592,0.465306,0.013994,47,0.470117,0.468294,0.462828,0.471939,0.466108,0.467857,0.003172
2,4.659146,1.063285,1.290153,0.327358,1,scale,poly,"{'C': 1, 'gamma': 'scale', 'kernel': 'poly'}",0.797376,0.771137,0.768222,0.77551,0.772595,0.776968,0.010471,17,0.885204,0.889213,0.885933,0.890306,0.892128,0.888557,0.002622
3,4.024781,0.370648,1.695156,0.136221,1,scale,rbf,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}",0.78863,0.753644,0.765306,0.766764,0.755102,0.765889,0.012526,23,0.85277,0.850948,0.84949,0.853499,0.854227,0.852187,0.001734
4,4.269115,0.659489,1.167223,0.141817,1,1,poly,"{'C': 1, 'gamma': 1, 'kernel': 'poly'}",0.797376,0.771137,0.768222,0.77551,0.772595,0.776968,0.010471,17,0.885204,0.889213,0.885933,0.890306,0.892128,0.888557,0.002622
5,4.389405,0.73875,1.786376,0.120123,1,1,rbf,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}",0.78863,0.753644,0.765306,0.766764,0.755102,0.765889,0.012526,23,0.85277,0.850948,0.84949,0.853499,0.854227,0.852187,0.001734
6,12.169989,1.359943,1.831526,0.154531,1,0.1,poly,"{'C': 1, 'gamma': 0.1, 'kernel': 'poly'}",0.553936,0.530612,0.543732,0.543732,0.555394,0.545481,0.00891,34,0.551385,0.560496,0.556122,0.562318,0.557216,0.557507,0.003783
7,6.779944,1.025799,2.133436,0.209361,1,0.1,rbf,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}",0.683673,0.696793,0.676385,0.695335,0.673469,0.685131,0.009537,31,0.705539,0.703353,0.707726,0.705539,0.710641,0.70656,0.002465
8,12.738895,1.916288,1.963908,0.322163,2,auto,poly,"{'C': 2, 'gamma': 'auto', 'kernel': 'poly'}",0.553936,0.530612,0.543732,0.543732,0.555394,0.545481,0.00891,34,0.551385,0.560496,0.556122,0.562318,0.557216,0.557507,0.003783
9,11.689229,2.074986,2.399812,0.025453,2,auto,rbf,"{'C': 2, 'gamma': 'auto', 'kernel': 'rbf'}",0.479592,0.463557,0.462099,0.441691,0.479592,0.465306,0.013994,47,0.470117,0.468294,0.462828,0.471939,0.466108,0.467857,0.003172


### Hyperparameter Tuning for Standardized Data for SVM

In [11]:
# Random Search CV

# Set up the sample space
c_list = [0.1, 1, 10, 100, 1000]
gamma_list = ['scale', 1, 0.1, 0.01, 0.001, 0.0001]
kernel_list = ['poly','rbf']
# Create the grid
parameter_grid = {'C' : c_list, 'gamma' : gamma_list, 'kernel' : kernel_list}
# Define how many samples
number_models = 10
# Create a random search object
random_SVM_class = RandomizedSearchCV(estimator = svm.SVC(),
                                      param_distributions = parameter_grid,
                                      n_iter = number_models,
                                      scoring='accuracy',
                                      n_jobs=4,
                                      cv = 5,
                                      refit=True,
                                      return_train_score = True)
# Fit the object to our data
random_SVM_class.fit(sscaled_data_train, y_train)

RandomizedSearchCV(cv=5, estimator=SVC(), n_jobs=4,
                   param_distributions={'C': [0.1, 1, 10, 100, 1000],
                                        'gamma': ['scale', 1, 0.1, 0.01, 0.001,
                                                  0.0001],
                                        'kernel': ['poly', 'rbf']},
                   return_train_score=True, scoring='accuracy')

In [12]:
# print best parameter after tuning
print(random_SVM_class.best_params_)
 
# print how our model looks after hyper-parameter tuning
print(random_SVM_class.best_estimator_)

{'kernel': 'rbf', 'gamma': 'scale', 'C': 100}
SVC(C=100)


In [13]:
# Predicted test outputs
random_SVM_class_predictions = random_SVM_class.predict(sscaled_data_test)

#print Confusion Matrix
print(confusion_matrix(y_test,random_SVM_class_predictions))
 
# print Classification Report
print(classification_report(y_test, random_SVM_class_predictions))

[[170   1   0   0  27  12   0]
 [  0 178   8   3  10  10   1]
 [  0   4 194   5   2   4   1]
 [  3   6   3 167  13  16   2]
 [ 26   8   1  13 151   6   5]
 [  6   6   6  13   2 153  24]
 [  0   0   0   1   3  16 190]]
              precision    recall  f1-score   support

           1       0.83      0.81      0.82       210
           2       0.88      0.85      0.86       210
           3       0.92      0.92      0.92       210
           4       0.83      0.80      0.81       210
           5       0.73      0.72      0.72       210
           6       0.71      0.73      0.72       210
           7       0.85      0.90      0.88       210

    accuracy                           0.82      1470
   macro avg       0.82      0.82      0.82      1470
weighted avg       0.82      0.82      0.82      1470



In [14]:
# Examining results using pandas df
rand_cv_results_df = pd.DataFrame(random_SVM_class.cv_results_)
pd.set_option('display.max_columns', None)
rand_cv_results_df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_gamma,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,8.984045,0.157191,2.746304,0.1691,rbf,0.001,1000.0,"{'kernel': 'rbf', 'gamma': 0.001, 'C': 1000}",0.782799,0.769679,0.779883,0.749271,0.77551,0.771429,0.011928,2,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
1,9.47317,0.58276,3.036204,0.228684,rbf,scale,100.0,"{'kernel': 'rbf', 'gamma': 'scale', 'C': 100}",0.787172,0.765306,0.774052,0.752187,0.782799,0.772303,0.012553,1,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
2,12.488123,0.332491,1.693061,0.055337,poly,0.1,1000.0,"{'kernel': 'poly', 'gamma': 0.1, 'C': 1000}",0.785714,0.753644,0.752187,0.744898,0.759475,0.759184,0.014055,3,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
3,5.107132,0.280398,2.376176,0.107138,rbf,0.0001,100.0,"{'kernel': 'rbf', 'gamma': 0.0001, 'C': 100}",0.766764,0.752187,0.765306,0.752187,0.75656,0.758601,0.006294,6,0.99016,0.99016,0.992711,0.987974,0.988338,0.989869,0.001684
4,4.970643,0.10241,2.261271,0.181722,rbf,0.0001,1000.0,"{'kernel': 'rbf', 'gamma': 0.0001, 'C': 1000}",0.746356,0.755102,0.759475,0.74344,0.744898,0.749854,0.006294,7,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
5,14.512515,1.482628,3.456601,0.346236,rbf,0.1,1.0,"{'kernel': 'rbf', 'gamma': 0.1, 'C': 1}",0.319242,0.300292,0.301749,0.304665,0.330904,0.31137,0.011864,10,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
6,14.049026,0.520385,2.186122,0.066593,poly,scale,0.1,"{'kernel': 'poly', 'gamma': 'scale', 'C': 0.1}",0.593294,0.516035,0.546647,0.556851,0.540816,0.550729,0.025171,8,0.656341,0.620627,0.63156,0.630831,0.629738,0.633819,0.011933
7,13.175904,0.227657,1.917379,0.041451,poly,0.01,0.1,"{'kernel': 'poly', 'gamma': 0.01, 'C': 0.1}",0.785714,0.753644,0.752187,0.744898,0.759475,0.759184,0.014055,3,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
8,16.429651,1.943494,5.003713,1.793466,rbf,0.01,1.0,"{'kernel': 'rbf', 'gamma': 0.01, 'C': 1}",0.51895,0.524781,0.534985,0.546647,0.507289,0.526531,0.013468,9,0.997449,0.99672,0.998178,0.99672,0.997813,0.997376,0.000583
9,11.281908,0.86517,1.722487,0.196814,poly,0.01,1000.0,"{'kernel': 'poly', 'gamma': 0.01, 'C': 1000}",0.785714,0.753644,0.752187,0.744898,0.759475,0.759184,0.014055,3,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515


In [15]:
# Grid Search CV

# Set up the sample space
c_list = [1, 3, 100, 1000]
gamma_list = ['auto', 'scale', 1, 0.1]
kernel_list = ['poly','rbf']
# Create the grid
parameter_grid = {'C' : c_list, 'gamma' : gamma_list, 'kernel' : kernel_list}

# Create a grid search object
grid_SVM_class = GridSearchCV(estimator = svm.SVC(),
                             param_grid = parameter_grid,
                             scoring='accuracy',
                             n_jobs=4,
                             cv = 5,
                             refit=True,
                             return_train_score=True)

# Fit the object to our data
grid_SVM_class.fit(sscaled_data_train, y_train)

GridSearchCV(cv=5, estimator=SVC(), n_jobs=4,
             param_grid={'C': [1, 3, 100, 1000],
                         'gamma': ['auto', 'scale', 1, 0.1],
                         'kernel': ['poly', 'rbf']},
             return_train_score=True, scoring='accuracy')

In [16]:
# print best parameter after tuning
print(grid_SVM_class.best_params_)
 
# print how our model looks after hyper-parameter tuning
print(grid_SVM_class.best_estimator_)

{'C': 3, 'gamma': 'auto', 'kernel': 'rbf'}
SVC(C=3, gamma='auto')


In [17]:
# Predicted test outputs
grid_SVM_class_predictions = grid_SVM_class.predict(sscaled_data_test)

#print Confusion Matrix
print(confusion_matrix(y_test, grid_SVM_class_predictions))
 
# print Classification Report
print(classification_report(y_test, grid_SVM_class_predictions))

[[173   1   0   1  23  12   0]
 [  0 174   8   3  12  12   1]
 [  0   5 194   4   2   4   1]
 [  4   5   4 166  13  16   2]
 [ 26   7   1  12 153   6   5]
 [  6   7   6  11   4 152  24]
 [  0   0   0   0   1  17 192]]
              precision    recall  f1-score   support

           1       0.83      0.82      0.83       210
           2       0.87      0.83      0.85       210
           3       0.91      0.92      0.92       210
           4       0.84      0.79      0.82       210
           5       0.74      0.73      0.73       210
           6       0.69      0.72      0.71       210
           7       0.85      0.91      0.88       210

    accuracy                           0.82      1470
   macro avg       0.82      0.82      0.82      1470
weighted avg       0.82      0.82      0.82      1470



In [18]:
# Examining results using pandas df
grid_cv_results_df = pd.DataFrame(grid_SVM_class.cv_results_)
pd.set_option('display.max_columns', None)
grid_cv_results_df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,11.658449,0.108038,1.892718,0.125339,1,auto,poly,"{'C': 1, 'gamma': 'auto', 'kernel': 'poly'}",0.760933,0.72449,0.725948,0.718659,0.734694,0.732945,0.014906,24,0.938776,0.943878,0.941327,0.938776,0.939504,0.940452,0.00195
1,8.4001,1.228252,3.057011,0.064939,1,auto,rbf,"{'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}",0.798834,0.771137,0.763848,0.759475,0.774052,0.773469,0.013693,3,0.931122,0.936224,0.931851,0.934038,0.936589,0.933965,0.002216
2,10.347663,0.298369,1.82386,0.164511,1,scale,poly,"{'C': 1, 'gamma': 'scale', 'kernel': 'poly'}",0.760933,0.725948,0.727405,0.720117,0.734694,0.733819,0.01433,23,0.938776,0.945335,0.940962,0.939504,0.939504,0.940816,0.002369
3,7.207869,0.854767,2.847299,0.138356,1,scale,rbf,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}",0.798834,0.771137,0.763848,0.759475,0.774052,0.773469,0.013693,3,0.931122,0.936224,0.931851,0.934038,0.936224,0.933892,0.002132
4,12.00397,0.17224,1.880228,0.050515,1,1,poly,"{'C': 1, 'gamma': 1, 'kernel': 'poly'}",0.785714,0.753644,0.752187,0.744898,0.759475,0.759184,0.014055,9,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
5,16.603175,1.309576,3.788525,0.353424,1,1,rbf,"{'C': 1, 'gamma': 1, 'kernel': 'rbf'}",0.427114,0.402332,0.415452,0.419825,0.447522,0.422449,0.014906,25,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
6,11.101491,0.323399,1.749464,0.066031,1,0.1,poly,"{'C': 1, 'gamma': 0.1, 'kernel': 'poly'}",0.785714,0.753644,0.752187,0.744898,0.759475,0.759184,0.014055,9,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
7,15.436338,0.612048,3.543386,0.102824,1,0.1,rbf,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}",0.319242,0.300292,0.301749,0.304665,0.330904,0.31137,0.011864,32,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
8,11.280744,0.458843,1.787863,0.061535,3,auto,poly,"{'C': 3, 'gamma': 'auto', 'kernel': 'poly'}",0.785714,0.750729,0.744898,0.744898,0.752187,0.755685,0.015305,21,0.994169,0.993805,0.994169,0.991254,0.99344,0.993367,0.001091
9,7.55613,0.656163,2.987109,0.058215,3,auto,rbf,"{'C': 3, 'gamma': 'auto', 'kernel': 'rbf'}",0.794461,0.763848,0.778426,0.753644,0.782799,0.774636,0.01436,1,0.991983,0.992711,0.99344,0.990889,0.992711,0.992347,0.000862


## FineTuned GPT Hinglish

In [19]:
# Fine Tuned GPT Hinglish vectorized data
x_df = pd.read_csv(pwd+"//Datasets//Kabita//FineTunedTransformers//gpt_hinglish_finetuned_vectorized_kabita_dataset.csv")
y_df = labels_df['kabita_labels']
# Test train split
x_train,x_test,y_train,y_test = train_test_split(x_df, y_df, test_size=0.30,random_state=21,stratify=y_df)

# Standard scaling of train data
standard_model = StandardScaler()
sscaled_data_train = standard_model.fit_transform(x_train)
# Standard scaling of test data
sscaled_data_test = standard_model.fit_transform(x_test)

### Hyperparameter Tuning for Standardized Data for SVM

In [20]:
# Random Search CV

# Set up the sample space
c_list = [0.1, 1, 10, 100, 1000]
gamma_list = ['scale', 1, 0.1, 0.01, 0.001, 0.0001]
kernel_list = ['poly','rbf']
# Create the grid
parameter_grid = {'C' : c_list, 'gamma' : gamma_list, 'kernel' : kernel_list}
# Define how many samples
number_models = 10
# Create a random search object
random_SVM_class = RandomizedSearchCV(estimator = svm.SVC(),
                                      param_distributions = parameter_grid,
                                      n_iter = number_models,
                                      scoring='accuracy',
                                      n_jobs=4,
                                      cv = 5,
                                      refit=True,
                                      return_train_score = True)
# Fit the object to our data
random_SVM_class.fit(sscaled_data_train, y_train)

RandomizedSearchCV(cv=5, estimator=SVC(), n_jobs=4,
                   param_distributions={'C': [0.1, 1, 10, 100, 1000],
                                        'gamma': ['scale', 1, 0.1, 0.01, 0.001,
                                                  0.0001],
                                        'kernel': ['poly', 'rbf']},
                   return_train_score=True, scoring='accuracy')

In [21]:
# print best parameter after tuning
print(random_SVM_class.best_params_)
 
# print how our model looks after hyper-parameter tuning
print(random_SVM_class.best_estimator_)

{'kernel': 'rbf', 'gamma': 0.0001, 'C': 10}
SVC(C=10, gamma=0.0001)


In [22]:
# Predicted test outputs
random_SVM_class_predictions = random_SVM_class.predict(sscaled_data_test)

#print Confusion Matrix
print(confusion_matrix(y_test,random_SVM_class_predictions))
 
# print Classification Report
print(classification_report(y_test, random_SVM_class_predictions))

[[160   0   0   2  33  11   4]
 [  0 182   6   9   8   3   2]
 [  1   2 196   5   1   5   0]
 [  4   6   2 170  11  14   3]
 [ 25   9   3  14 146   5   8]
 [  3   4   3  17   5 129  49]
 [  0   1   0   1   3  16 189]]
              precision    recall  f1-score   support

           1       0.83      0.76      0.79       210
           2       0.89      0.87      0.88       210
           3       0.93      0.93      0.93       210
           4       0.78      0.81      0.79       210
           5       0.71      0.70      0.70       210
           6       0.70      0.61      0.66       210
           7       0.74      0.90      0.81       210

    accuracy                           0.80      1470
   macro avg       0.80      0.80      0.80      1470
weighted avg       0.80      0.80      0.80      1470



In [23]:
# Examining results using pandas df
rand_cv_results_df = pd.DataFrame(random_SVM_class.cv_results_)
pd.set_option('display.max_columns', None)
rand_cv_results_df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_gamma,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,18.247943,0.331747,2.554531,0.071171,poly,0.01,1.0,"{'kernel': 'poly', 'gamma': 0.01, 'C': 1}",0.739067,0.749271,0.752187,0.741983,0.740525,0.744606,0.005166,3,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
1,20.661357,2.217096,4.89521,0.096823,rbf,0.01,1.0,"{'kernel': 'rbf', 'gamma': 0.01, 'C': 1}",0.409621,0.431487,0.432945,0.405248,0.428571,0.421574,0.011713,9,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
2,16.817933,0.208361,2.393858,0.085665,poly,scale,1000.0,"{'kernel': 'poly', 'gamma': 'scale', 'C': 1000}",0.739067,0.749271,0.752187,0.741983,0.740525,0.744606,0.005166,3,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
3,17.893028,0.149215,2.5372,0.100656,poly,0.1,0.1,"{'kernel': 'poly', 'gamma': 0.1, 'C': 0.1}",0.739067,0.749271,0.752187,0.741983,0.740525,0.744606,0.005166,3,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
4,17.681365,0.272679,2.502431,0.047001,poly,0.1,1.0,"{'kernel': 'poly', 'gamma': 0.1, 'C': 1}",0.739067,0.749271,0.752187,0.741983,0.740525,0.744606,0.005166,3,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
5,18.728064,0.198735,2.891419,0.057035,poly,0.0001,100.0,"{'kernel': 'poly', 'gamma': 0.0001, 'C': 100}",0.488338,0.469388,0.479592,0.505831,0.488338,0.486297,0.012007,8,0.552478,0.565598,0.570335,0.570335,0.552478,0.562245,0.00816
6,17.70875,0.138746,2.519612,0.060702,poly,0.01,10.0,"{'kernel': 'poly', 'gamma': 0.01, 'C': 10}",0.739067,0.749271,0.752187,0.741983,0.740525,0.744606,0.005166,3,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
7,6.673207,0.657424,3.461624,0.198658,rbf,0.0001,10.0,"{'kernel': 'rbf', 'gamma': 0.0001, 'C': 10}",0.769679,0.795918,0.787172,0.78863,0.793003,0.78688,0.009145,1,0.926385,0.916181,0.918003,0.924563,0.923469,0.92172,0.003935
8,20.505315,0.461576,4.849858,0.187321,rbf,1,1.0,"{'kernel': 'rbf', 'gamma': 1, 'C': 1}",0.393586,0.383382,0.379009,0.374636,0.393586,0.38484,0.007658,10,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
9,11.695765,0.506702,3.595111,0.443995,rbf,scale,100.0,"{'kernel': 'rbf', 'gamma': 'scale', 'C': 100}",0.77551,0.794461,0.793003,0.787172,0.782799,0.786589,0.006936,2,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515


In [24]:
# Grid Search CV

# Set up the sample space
c_list = [1, 10, 100, 1000]
gamma_list = ['auto', 'scale', 0.0001, 0.1]
kernel_list = ['poly','rbf']
# Create the grid
parameter_grid = {'C' : c_list, 'gamma' : gamma_list, 'kernel' : kernel_list}

# Create a grid search object
grid_SVM_class = GridSearchCV(estimator = svm.SVC(),
                             param_grid = parameter_grid,
                             scoring='accuracy',
                             n_jobs=6,
                             cv = 5,
                             refit=True,
                             return_train_score=True)

# Fit the object to our data
grid_SVM_class.fit(sscaled_data_train, y_train)

GridSearchCV(cv=5, estimator=SVC(), n_jobs=6,
             param_grid={'C': [1, 10, 100, 1000],
                         'gamma': ['auto', 'scale', 0.0001, 0.1],
                         'kernel': ['poly', 'rbf']},
             return_train_score=True, scoring='accuracy')

In [25]:
# print best parameter after tuning
print(grid_SVM_class.best_params_)
 
# print how our model looks after hyper-parameter tuning
print(grid_SVM_class.best_estimator_)

{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}
SVC(C=10, gamma='auto')


In [26]:
# Predicted test outputs
grid_SVM_class_predictions = grid_SVM_class.predict(sscaled_data_test)

#print Confusion Matrix
print(confusion_matrix(y_test, grid_SVM_class_predictions))
 
# print Classification Report
print(classification_report(y_test, grid_SVM_class_predictions))

[[161   0   0   2  35  11   1]
 [  1 182   5   7   9   6   0]
 [  1   1 196   5   4   3   0]
 [  3   8   5 172   9  12   1]
 [ 23   4   3  16 155   3   6]
 [  3   7   4  12   5 143  36]
 [  1   1   0   0   3  23 182]]
              precision    recall  f1-score   support

           1       0.83      0.77      0.80       210
           2       0.90      0.87      0.88       210
           3       0.92      0.93      0.93       210
           4       0.80      0.82      0.81       210
           5       0.70      0.74      0.72       210
           6       0.71      0.68      0.70       210
           7       0.81      0.87      0.83       210

    accuracy                           0.81      1470
   macro avg       0.81      0.81      0.81      1470
weighted avg       0.81      0.81      0.81      1470



In [27]:
# Examining results using pandas df
grid_cv_results_df = pd.DataFrame(grid_SVM_class.cv_results_)
pd.set_option('display.max_columns', None)
grid_cv_results_df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,split3_train_score,split4_train_score,mean_train_score,std_train_score
0,20.825841,0.056213,3.384423,0.043852,1,auto,poly,"{'C': 1, 'gamma': 'auto', 'kernel': 'poly'}",0.677843,0.71137,0.709913,0.69242,0.709913,0.700292,0.01322,24,0.89395,0.896137,0.894679,0.891764,0.893222,0.89395,0.001458
1,15.52527,0.109069,4.828736,0.324756,1,auto,rbf,"{'C': 1, 'gamma': 'auto', 'kernel': 'rbf'}",0.771137,0.794461,0.778426,0.787172,0.778426,0.781924,0.008069,8,0.933309,0.931122,0.931122,0.931487,0.934767,0.932362,0.00145
2,19.359136,0.785114,2.983197,0.260425,1,scale,poly,"{'C': 1, 'gamma': 'scale', 'kernel': 'poly'}",0.676385,0.708455,0.709913,0.69242,0.708455,0.699125,0.013058,25,0.895044,0.894315,0.893586,0.891399,0.895408,0.89395,0.001421
3,14.982529,0.987448,5.118737,0.213796,1,scale,rbf,"{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}",0.771137,0.794461,0.778426,0.787172,0.778426,0.781924,0.008069,8,0.933309,0.930758,0.930758,0.931487,0.934767,0.932216,0.00158
4,29.441437,1.015042,4.0937,0.170232,1,0.0001,poly,"{'C': 1, 'gamma': 0.0001, 'kernel': 'poly'}",0.322157,0.30758,0.344023,0.314869,0.319242,0.321574,0.012252,28,0.333819,0.324344,0.380831,0.338921,0.355321,0.346647,0.019826
5,15.535385,0.737784,5.060856,0.372666,1,0.0001,rbf,"{'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}",0.71137,0.734694,0.718659,0.706997,0.715743,0.717493,0.009465,22,0.763848,0.754738,0.761297,0.756924,0.762755,0.759913,0.003499
6,23.016553,2.003271,3.314668,0.038642,1,0.1,poly,"{'C': 1, 'gamma': 0.1, 'kernel': 'poly'}",0.739067,0.749271,0.752187,0.741983,0.740525,0.744606,0.005166,12,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
7,27.551253,1.446582,5.809424,0.305563,1,0.1,rbf,"{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}",0.317784,0.297376,0.303207,0.303207,0.330904,0.310496,0.012231,32,0.997449,0.997085,0.998178,0.99672,0.997813,0.997449,0.000515
8,21.789089,1.516345,3.142183,0.201943,10,auto,poly,"{'C': 10, 'gamma': 'auto', 'kernel': 'poly'}",0.746356,0.74344,0.753644,0.737609,0.740525,0.744315,0.005501,20,0.99672,0.996356,0.997813,0.995991,0.99672,0.99672,0.00061
9,17.331294,1.291662,5.059342,0.205656,10,auto,rbf,"{'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}",0.778426,0.798834,0.793003,0.787172,0.787172,0.788921,0.0068,1,0.997449,0.997085,0.998178,0.99672,0.997449,0.997376,0.000483
