In [1]:
# Import necessary packages
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import *

In [2]:
%%time
df = pd.read_parquet("datasets/creditcard-kaggle.parquet")
df.head()

CPU times: user 770 ms, sys: 1.16 s, total: 1.93 s
Wall time: 1.29 s


Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [3]:
df

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,4.356170,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,-0.975926,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,-0.484782,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,-0.399126,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [4]:
df['Class'].value_counts(normalize=True) * 100

0    99.827251
1     0.172749
Name: Class, dtype: float64

In [5]:
# Dataset sampling for faster computation
pos_idx = list(df[df['Class']==1].index)
neg_idx = list(df[df['Class']==0].sample(5000, random_state=42).index)

df = df.loc[pos_idx+neg_idx]
print(df.shape)
df['Class'].value_counts()

(5492, 30)


0    5000
1     492
Name: Class, dtype: int64

In [6]:
# Train Test Split
y = df['Class']
X = df.drop('Class', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(X_train.shape)
print(X_test.shape)

(4393, 29)
(1099, 29)


In [7]:
# Initialze the estimators
clf1 = RandomForestClassifier(random_state=42)
clf2 = SVC(probability=True, random_state=42)
clf3 = LogisticRegression(random_state=42, max_iter=200)
clf4 = DecisionTreeClassifier(random_state=42)
clf5 = KNeighborsClassifier()
clf6 = MultinomialNB()
clf7 = GradientBoostingClassifier(random_state=42)

In [8]:
# Initiaze the hyperparameters for each dictionary
param1 = {}
param1['classifier__n_estimators'] = [10, 50, 100, 250]
param1['classifier__max_depth'] = [5, 10, 20]
param1['classifier__class_weight'] = [None, {0:1,1:5}, {0:1,1:10}, {0:1,1:25}]
param1['classifier'] = [clf1]

param2 = {}
param2['classifier__C'] = [10**-2, 10**-1, 10**0, 10**1, 10**2]
param2['classifier__class_weight'] = [None, {0:1,1:5}, {0:1,1:10}, {0:1,1:25}]
param2['classifier'] = [clf2]

param3 = {}
param3['classifier__C'] = [10**-2, 10**-1, 10**0, 10**1, 10**2]
param3['classifier__penalty'] = ['l1', 'l2']
param3['classifier__class_weight'] = [None, {0:1,1:5}, {0:1,1:10}, {0:1,1:25}]
param3['classifier'] = [clf3]

param4 = {}
param4['classifier__max_depth'] = [5,10,25,None]
param4['classifier__min_samples_split'] = [2,5,10]
param4['classifier__class_weight'] = [None, {0:1,1:5}, {0:1,1:10}, {0:1,1:25}]
param4['classifier'] = [clf4]

param5 = {}
param5['classifier__n_neighbors'] = [2,5,10,25,50]
param5['classifier'] = [clf5]

param6 = {}
param6['classifier__alpha'] = [10**0, 10**1, 10**2]
param6['classifier'] = [clf6]

param7 = {}
param7['classifier__n_estimators'] = [10, 50, 100, 250]
param7['classifier__max_depth'] = [5, 10, 20]
param7['classifier'] = [clf7]

In [9]:
pipeline = Pipeline([('classifier', clf1)])
params = [param1, param2, param3, param4, param5, param6, param7]

### Grid Search CV:

In [10]:
%%time
# Train the grid search model
gs = GridSearchCV(pipeline, params, cv=3, n_jobs=-1, scoring='roc_auc', return_train_score=True).fit(X_train, y_train)

CPU times: user 3.59 s, sys: 847 ms, total: 4.43 s
Wall time: 2min 24s


In [11]:
# Best performing model and its corresponding hyperparameters
gs.best_params_

{'classifier': SVC(C=10, probability=True, random_state=42),
 'classifier__C': 10,
 'classifier__class_weight': None}

In [12]:
# ROC-AUC score for the best model
gs.best_score_

0.9824812928885703

In [13]:
# Test data performance
y_pred_gs = gs.predict(X_test)
print(classification_report(y_test, y_pred_gs, digits=4))

              precision    recall  f1-score   support

           0     0.9737    0.9980    0.9857      1001
           1     0.9726    0.7245    0.8304        98

    accuracy                         0.9736      1099
   macro avg     0.9731    0.8612    0.9081      1099
weighted avg     0.9736    0.9736    0.9718      1099



In [14]:
print("Test Accurary:", accuracy_score(y_test, y_pred_gs))
print("Test Precision:", precision_score(y_test, y_pred_gs, average='weighted'))
print("Test Recall:", recall_score(y_test, y_pred_gs, average='weighted'))
print("Test F1-Score:", f1_score(y_test, y_pred_gs, average='weighted'))
print("Test ROC AUC Score:", roc_auc_score(y_test, y_pred_gs, average='weighted'))

Test Accurary: 0.9736123748862603
Test Precision: 0.9735877736396659
Test Recall: 0.9736123748862603
Test F1-Score: 0.971846180779685
Test ROC AUC Score: 0.8612458969601826


In [15]:
df_results_gs = pd.DataFrame(gs.cv_results_)
df_results_gs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier,param_classifier__class_weight,param_classifier__max_depth,param_classifier__n_estimators,param_classifier__C,param_classifier__penalty,...,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
0,0.184324,0.002470,0.024736,0.005114,RandomForestClassifier(random_state=42),,5,10,,,...,0.947985,0.951510,0.954013,0.006202,87,0.984942,0.970069,0.982609,0.979207,0.006531
1,0.787624,0.004555,0.030150,0.003031,RandomForestClassifier(random_state=42),,5,50,,,...,0.975779,0.966250,0.968985,0.004835,71,0.989687,0.984556,0.988348,0.987530,0.002173
2,1.516883,0.038904,0.046076,0.004029,RandomForestClassifier(random_state=42),,5,100,,,...,0.976306,0.969391,0.971505,0.003403,60,0.992238,0.986582,0.988916,0.989246,0.002321
3,3.714544,0.097932,0.081182,0.016086,RandomForestClassifier(random_state=42),,5,250,,,...,0.976644,0.972083,0.972518,0.003206,53,0.992828,0.987300,0.990923,0.990350,0.002293
4,0.236355,0.001887,0.015919,0.001264,RandomForestClassifier(random_state=42),,10,10,,,...,0.972552,0.967109,0.963720,0.008923,78,0.999828,0.995567,0.997193,0.997529,0.001756
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,29.677286,0.936422,0.020654,0.001536,GradientBoostingClassifier(random_state=42),,10,250,,,...,0.977609,0.957646,0.965643,0.008619,77,1.000000,1.000000,1.000000,1.000000,0.000000
172,1.571283,0.426873,0.008919,0.004125,GradientBoostingClassifier(random_state=42),,20,10,,,...,0.909685,0.913373,0.912027,0.001662,137,1.000000,1.000000,1.000000,1.000000,0.000000
173,8.033707,2.009740,0.010341,0.000662,GradientBoostingClassifier(random_state=42),,20,50,,,...,0.915378,0.921565,0.924610,0.009041,109,1.000000,1.000000,1.000000,1.000000,0.000000
174,16.260831,3.526940,0.014798,0.001163,GradientBoostingClassifier(random_state=42),,20,100,,,...,0.933485,0.922049,0.921931,0.009483,110,1.000000,1.000000,1.000000,1.000000,0.000000


In [16]:
df_results_gs = df_results_gs.sort_values(by='rank_test_score')
df_results_gs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier,param_classifier__class_weight,param_classifier__max_depth,param_classifier__n_estimators,param_classifier__C,param_classifier__penalty,...,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
60,0.897965,0.124693,0.036361,0.000724,"SVC(C=10, probability=True, random_state=42)",,,,10,,...,0.988008,0.979310,0.982481,0.003922,1,0.986338,0.982687,0.985542,0.984856,0.001567
69,0.280235,0.011234,0.008820,0.001519,"LogisticRegression(max_iter=200, random_state=42)",,,,0.01,l2,...,0.985489,0.983358,0.982248,0.003197,2,0.986561,0.982587,0.984966,0.984705,0.001633
61,1.292390,0.111844,0.053187,0.002855,"SVC(C=10, probability=True, random_state=42)","{0: 1, 1: 5}",,,10,,...,0.987396,0.976068,0.981278,0.004669,3,0.988301,0.983564,0.986491,0.986119,0.001951
71,0.357728,0.046072,0.009263,0.001350,"LogisticRegression(max_iter=200, random_state=42)","{0: 1, 1: 5}",,,0.01,l2,...,0.985729,0.979882,0.980831,0.003674,4,0.987890,0.983556,0.986809,0.986085,0.001842
56,1.173303,0.085795,0.056226,0.004085,"SVC(C=10, probability=True, random_state=42)",,,,1,,...,0.988129,0.976641,0.980580,0.005339,5,0.984457,0.978498,0.983915,0.982290,0.002690
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,0.005687,0.002076,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)","{0: 1, 1: 10}",,,100,l1,...,,,,,172,,,,,
100,0.003691,0.000635,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)",,,,100,l1,...,,,,,173,,,,,
68,0.003118,0.000320,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)",,,,0.01,l1,...,,,,,174,,,,,
106,0.008849,0.004533,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)","{0: 1, 1: 25}",,,100,l1,...,,,,,175,,,,,


In [17]:
df_results_gs.reset_index(drop=True, inplace=True)
df_results_gs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier,param_classifier__class_weight,param_classifier__max_depth,param_classifier__n_estimators,param_classifier__C,param_classifier__penalty,...,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
0,0.897965,0.124693,0.036361,0.000724,"SVC(C=10, probability=True, random_state=42)",,,,10,,...,0.988008,0.979310,0.982481,0.003922,1,0.986338,0.982687,0.985542,0.984856,0.001567
1,0.280235,0.011234,0.008820,0.001519,"LogisticRegression(max_iter=200, random_state=42)",,,,0.01,l2,...,0.985489,0.983358,0.982248,0.003197,2,0.986561,0.982587,0.984966,0.984705,0.001633
2,1.292390,0.111844,0.053187,0.002855,"SVC(C=10, probability=True, random_state=42)","{0: 1, 1: 5}",,,10,,...,0.987396,0.976068,0.981278,0.004669,3,0.988301,0.983564,0.986491,0.986119,0.001951
3,0.357728,0.046072,0.009263,0.001350,"LogisticRegression(max_iter=200, random_state=42)","{0: 1, 1: 5}",,,0.01,l2,...,0.985729,0.979882,0.980831,0.003674,4,0.987890,0.983556,0.986809,0.986085,0.001842
4,1.173303,0.085795,0.056226,0.004085,"SVC(C=10, probability=True, random_state=42)",,,,1,,...,0.988129,0.976641,0.980580,0.005339,5,0.984457,0.978498,0.983915,0.982290,0.002690
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,0.005687,0.002076,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)","{0: 1, 1: 10}",,,100,l1,...,,,,,172,,,,,
172,0.003691,0.000635,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)",,,,100,l1,...,,,,,173,,,,,
173,0.003118,0.000320,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)",,,,0.01,l1,...,,,,,174,,,,,
174,0.008849,0.004533,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)","{0: 1, 1: 25}",,,100,l1,...,,,,,175,,,,,


In [18]:
df_results_gs['param_classifier'][0]

SVC(C=10, probability=True, random_state=42)

In [19]:
df_results_gs['mean_train_score'][0], df_results_gs['mean_test_score'][0]

(0.984855803464116, 0.9824812928885703)

### Randomized Search CV:

In [20]:
%%time
# Train the random search model
rs = RandomizedSearchCV(pipeline, params, cv=3, n_jobs=-1, scoring='roc_auc', return_train_score=True, random_state=42).fit(X_train, y_train)

CPU times: user 1.54 s, sys: 4.21 ms, total: 1.54 s
Wall time: 5.8 s


In [21]:
# Best performing model and its corresponding hyperparameters
rs.best_params_

{'classifier__n_estimators': 100,
 'classifier__max_depth': 10,
 'classifier__class_weight': {0: 1, 1: 10},
 'classifier': RandomForestClassifier(class_weight={0: 1, 1: 10}, max_depth=10,
                        random_state=42)}

In [22]:
# ROC-AUC score for the best model
rs.best_score_

0.9795824794736737

In [23]:
# Test data performance
y_pred_rs = rs.predict(X_test)
print(classification_report(y_test, y_pred_rs, digits=4))

              precision    recall  f1-score   support

           0     0.9891    0.9990    0.9940      1001
           1     0.9886    0.8878    0.9355        98

    accuracy                         0.9891      1099
   macro avg     0.9889    0.9434    0.9648      1099
weighted avg     0.9891    0.9891    0.9888      1099



In [24]:
print("Test Accurary:", accuracy_score(y_test, y_pred_rs))
print("Test Precision:", precision_score(y_test, y_pred_rs, average='weighted'))
print("Test Recall:", recall_score(y_test, y_pred_rs, average='weighted'))
print("Test F1-Score:", f1_score(y_test, y_pred_rs, average='weighted'))
print("Test ROC AUC Score:", roc_auc_score(y_test, y_pred_rs, average='weighted'))

Test Accurary: 0.989080982711556
Test Precision: 0.9890765848967668
Test Recall: 0.989080982711556
Test F1-Score: 0.9888145954762486
Test ROC AUC Score: 0.9433780505209077


In [25]:
df_results_rs = pd.DataFrame(rs.cv_results_)
df_results_rs.shape

(10, 22)

In [26]:
df_results_rs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier__n_estimators,param_classifier__max_depth,param_classifier__class_weight,param_classifier,param_classifier__min_samples_split,param_classifier__C,...,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
0,3.469727,0.085035,0.065514,0.003783,250.0,10.0,"{0: 1, 1: 5}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.983135,0.970983,0.974524,0.006121,2,1.0,1.0,1.0,1.0,0.0
1,0.583017,0.003483,0.020129,0.001803,50.0,20.0,"{0: 1, 1: 25}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.978359,0.97683,0.972449,0.007304,4,1.0,1.0,1.0,1.0,0.0
2,0.098213,0.0125,0.007008,0.000908,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),5.0,,...,0.918713,0.920503,0.917942,0.002467,7,0.999961,0.999999,0.999964,0.999975,1.7e-05
3,1.330924,0.039142,0.038947,0.001477,100.0,10.0,"{0: 1, 1: 10}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.986565,0.976109,0.979582,0.004938,1,1.0,1.0,1.0,1.0,0.0
4,1.541996,0.167863,0.07424,0.003669,,,"{0: 1, 1: 25}","SVC(C=10, probability=True, random_state=42)",,100.0,...,0.980621,0.970382,0.973241,0.005263,3,0.991711,0.987933,0.99041,0.990018,0.001567
5,0.154529,0.007501,0.009775,0.001182,10.0,10.0,"{0: 1, 1: 5}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.981016,0.956243,0.970491,0.010451,5,0.99999,0.99903,0.998021,0.999014,0.000804
6,0.149005,0.041216,0.005931,0.000438,,,,DecisionTreeClassifier(random_state=42),10.0,,...,0.923406,0.921331,0.916881,0.007807,8,0.99964,0.999751,0.99985,0.999747,8.6e-05
7,1.449453,0.326977,0.005562,0.001243,10.0,20.0,,GradientBoostingClassifier(random_state=42),,,...,0.909685,0.913373,0.912027,0.001662,10,1.0,1.0,1.0,1.0,0.0
8,0.067981,0.001309,0.005222,0.000327,,5.0,,DecisionTreeClassifier(random_state=42),5.0,,...,0.911432,0.946671,0.915802,0.023624,9,0.960677,0.960043,0.979433,0.966718,0.008995
9,0.087174,0.009304,0.005874,0.000801,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),10.0,,...,0.938115,0.921262,0.926272,0.008407,6,0.999842,0.999786,0.999786,0.999805,2.6e-05


In [27]:
df_results_rs = df_results_rs.sort_values(by='rank_test_score')
df_results_rs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier__n_estimators,param_classifier__max_depth,param_classifier__class_weight,param_classifier,param_classifier__min_samples_split,param_classifier__C,...,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
3,1.330924,0.039142,0.038947,0.001477,100.0,10.0,"{0: 1, 1: 10}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.986565,0.976109,0.979582,0.004938,1,1.0,1.0,1.0,1.0,0.0
0,3.469727,0.085035,0.065514,0.003783,250.0,10.0,"{0: 1, 1: 5}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.983135,0.970983,0.974524,0.006121,2,1.0,1.0,1.0,1.0,0.0
4,1.541996,0.167863,0.07424,0.003669,,,"{0: 1, 1: 25}","SVC(C=10, probability=True, random_state=42)",,100.0,...,0.980621,0.970382,0.973241,0.005263,3,0.991711,0.987933,0.99041,0.990018,0.001567
1,0.583017,0.003483,0.020129,0.001803,50.0,20.0,"{0: 1, 1: 25}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.978359,0.97683,0.972449,0.007304,4,1.0,1.0,1.0,1.0,0.0
5,0.154529,0.007501,0.009775,0.001182,10.0,10.0,"{0: 1, 1: 5}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.981016,0.956243,0.970491,0.010451,5,0.99999,0.99903,0.998021,0.999014,0.000804
9,0.087174,0.009304,0.005874,0.000801,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),10.0,,...,0.938115,0.921262,0.926272,0.008407,6,0.999842,0.999786,0.999786,0.999805,2.6e-05
2,0.098213,0.0125,0.007008,0.000908,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),5.0,,...,0.918713,0.920503,0.917942,0.002467,7,0.999961,0.999999,0.999964,0.999975,1.7e-05
6,0.149005,0.041216,0.005931,0.000438,,,,DecisionTreeClassifier(random_state=42),10.0,,...,0.923406,0.921331,0.916881,0.007807,8,0.99964,0.999751,0.99985,0.999747,8.6e-05
8,0.067981,0.001309,0.005222,0.000327,,5.0,,DecisionTreeClassifier(random_state=42),5.0,,...,0.911432,0.946671,0.915802,0.023624,9,0.960677,0.960043,0.979433,0.966718,0.008995
7,1.449453,0.326977,0.005562,0.001243,10.0,20.0,,GradientBoostingClassifier(random_state=42),,,...,0.909685,0.913373,0.912027,0.001662,10,1.0,1.0,1.0,1.0,0.0


In [28]:
df_results_rs.reset_index(drop=True, inplace=True)
df_results_rs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier__n_estimators,param_classifier__max_depth,param_classifier__class_weight,param_classifier,param_classifier__min_samples_split,param_classifier__C,...,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
0,1.330924,0.039142,0.038947,0.001477,100.0,10.0,"{0: 1, 1: 10}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.986565,0.976109,0.979582,0.004938,1,1.0,1.0,1.0,1.0,0.0
1,3.469727,0.085035,0.065514,0.003783,250.0,10.0,"{0: 1, 1: 5}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.983135,0.970983,0.974524,0.006121,2,1.0,1.0,1.0,1.0,0.0
2,1.541996,0.167863,0.07424,0.003669,,,"{0: 1, 1: 25}","SVC(C=10, probability=True, random_state=42)",,100.0,...,0.980621,0.970382,0.973241,0.005263,3,0.991711,0.987933,0.99041,0.990018,0.001567
3,0.583017,0.003483,0.020129,0.001803,50.0,20.0,"{0: 1, 1: 25}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.978359,0.97683,0.972449,0.007304,4,1.0,1.0,1.0,1.0,0.0
4,0.154529,0.007501,0.009775,0.001182,10.0,10.0,"{0: 1, 1: 5}","RandomForestClassifier(class_weight={0: 1, 1: ...",,,...,0.981016,0.956243,0.970491,0.010451,5,0.99999,0.99903,0.998021,0.999014,0.000804
5,0.087174,0.009304,0.005874,0.000801,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),10.0,,...,0.938115,0.921262,0.926272,0.008407,6,0.999842,0.999786,0.999786,0.999805,2.6e-05
6,0.098213,0.0125,0.007008,0.000908,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),5.0,,...,0.918713,0.920503,0.917942,0.002467,7,0.999961,0.999999,0.999964,0.999975,1.7e-05
7,0.149005,0.041216,0.005931,0.000438,,,,DecisionTreeClassifier(random_state=42),10.0,,...,0.923406,0.921331,0.916881,0.007807,8,0.99964,0.999751,0.99985,0.999747,8.6e-05
8,0.067981,0.001309,0.005222,0.000327,,5.0,,DecisionTreeClassifier(random_state=42),5.0,,...,0.911432,0.946671,0.915802,0.023624,9,0.960677,0.960043,0.979433,0.966718,0.008995
9,1.449453,0.326977,0.005562,0.001243,10.0,20.0,,GradientBoostingClassifier(random_state=42),,,...,0.909685,0.913373,0.912027,0.001662,10,1.0,1.0,1.0,1.0,0.0


In [29]:
df_results_rs['param_classifier'][0]

RandomForestClassifier(class_weight={0: 1, 1: 10}, max_depth=10,
                       random_state=42)

In [30]:
df_results_rs['mean_train_score'][0], df_results_rs['mean_test_score'][0]

(1.0, 0.9795824794736737)