In [1]:
# Import necessary packages
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import *

In [2]:
%%time
df = pd.read_parquet("datasets/creditcard-kaggle.parquet")
df.head()

CPU times: user 442 ms, sys: 494 ms, total: 936 ms
Wall time: 561 ms


Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [3]:
df

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,4.356170,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,-0.975926,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,-0.484782,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,-0.399126,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [4]:
df['Class'].value_counts(normalize=True) * 100

0    99.827251
1     0.172749
Name: Class, dtype: float64

In [5]:
# Dataset sampling for faster computation
pos_idx = list(df[df['Class']==1].index)
neg_idx = list(df[df['Class']==0].sample(5000).index)

df = df.loc[pos_idx+neg_idx]
print(df.shape)
df['Class'].value_counts()

(5492, 30)


0    5000
1     492
Name: Class, dtype: int64

In [6]:
# Train Test Split
y = df['Class']
X = df.drop('Class', axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
print(X_train.shape)
print(X_test.shape)

(4393, 29)
(1099, 29)


In [7]:
# Initialze the estimators
clf1 = RandomForestClassifier(random_state=42)
clf2 = SVC(probability=True, random_state=42)
clf3 = LogisticRegression(random_state=42, max_iter=200)
clf4 = DecisionTreeClassifier(random_state=42)
clf5 = KNeighborsClassifier()
clf6 = MultinomialNB()
clf7 = GradientBoostingClassifier(random_state=42)

In [8]:
# Initiaze the hyperparameters for each dictionary
param1 = {}
param1['classifier__n_estimators'] = [10, 50, 100, 250]
param1['classifier__max_depth'] = [5, 10, 20]
param1['classifier__class_weight'] = [None, {0:1,1:5}, {0:1,1:10}, {0:1,1:25}]
param1['classifier'] = [clf1]

param2 = {}
param2['classifier__C'] = [10**-2, 10**-1, 10**0, 10**1, 10**2]
param2['classifier__class_weight'] = [None, {0:1,1:5}, {0:1,1:10}, {0:1,1:25}]
param2['classifier'] = [clf2]

param3 = {}
param3['classifier__C'] = [10**-2, 10**-1, 10**0, 10**1, 10**2]
param3['classifier__penalty'] = ['l1', 'l2']
param3['classifier__class_weight'] = [None, {0:1,1:5}, {0:1,1:10}, {0:1,1:25}]
param3['classifier'] = [clf3]

param4 = {}
param4['classifier__max_depth'] = [5,10,25,None]
param4['classifier__min_samples_split'] = [2,5,10]
param4['classifier__class_weight'] = [None, {0:1,1:5}, {0:1,1:10}, {0:1,1:25}]
param4['classifier'] = [clf4]

param5 = {}
param5['classifier__n_neighbors'] = [2,5,10,25,50]
param5['classifier'] = [clf5]

param6 = {}
param6['classifier__alpha'] = [10**0, 10**1, 10**2]
param6['classifier'] = [clf6]

param7 = {}
param7['classifier__n_estimators'] = [10, 50, 100, 250]
param7['classifier__max_depth'] = [5, 10, 20]
param7['classifier'] = [clf7]

In [9]:
pipeline = Pipeline([('classifier', clf1)])
params = [param1, param2, param3, param4, param5, param6, param7]

### Grid Search CV:

In [10]:
%%time
# Train the grid search model
gs = GridSearchCV(pipeline, params, cv=3, n_jobs=-1, scoring='roc_auc').fit(X_train, y_train)

CPU times: user 4.84 s, sys: 631 ms, total: 5.47 s
Wall time: 2min 35s


In [11]:
# Best performing model and its corresponding hyperparameters
gs.best_params_

{'classifier': SVC(C=10, class_weight={0: 1, 1: 5}, probability=True, random_state=42),
 'classifier__C': 10,
 'classifier__class_weight': {0: 1, 1: 5}}

In [12]:
# ROC-AUC score for the best model
gs.best_score_

0.9833321012418267

In [13]:
# Test data performance
print("Test Precision:",precision_score(gs.predict(X_test), y_test))
print("Test Recall:",recall_score(gs.predict(X_test), y_test))
print("Test ROC AUC Score:",roc_auc_score(gs.predict(X_test), y_test))

Test Precision: 0.8673469387755102
Test Recall: 0.9770114942528736
Test ROC AUC Score: 0.982082822225251


In [14]:
df_results_gs = pd.DataFrame(gs.cv_results_)
df_results_gs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier,param_classifier__class_weight,param_classifier__max_depth,param_classifier__n_estimators,param_classifier__C,param_classifier__penalty,param_classifier__min_samples_split,param_classifier__n_neighbors,param_classifier__alpha,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.159832,0.015633,0.017007,0.001893,RandomForestClassifier(random_state=42),,5,10,,,,,,{'classifier': RandomForestClassifier(random_s...,0.959427,0.962075,0.949128,0.956877,0.005585,87
1,0.574294,0.069169,0.023809,0.004736,RandomForestClassifier(random_state=42),,5,50,,,,,,{'classifier': RandomForestClassifier(random_s...,0.968813,0.975928,0.965196,0.969979,0.004458,70
2,1.105796,0.061513,0.038773,0.007705,RandomForestClassifier(random_state=42),,5,100,,,,,,{'classifier': RandomForestClassifier(random_s...,0.969123,0.977217,0.971885,0.972742,0.003359,65
3,2.832051,0.189206,0.084482,0.031648,RandomForestClassifier(random_state=42),,5,250,,,,,,{'classifier': RandomForestClassifier(random_s...,0.970265,0.976988,0.972558,0.973270,0.002790,60
4,0.183581,0.027017,0.010616,0.000957,RandomForestClassifier(random_state=42),,10,10,,,,,,{'classifier': RandomForestClassifier(random_s...,0.960786,0.983854,0.958110,0.967583,0.011557,74
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,34.613931,6.201213,0.021524,0.003189,GradientBoostingClassifier(random_state=42),,10,250,,,,,,{'classifier': GradientBoostingClassifier(rand...,0.969259,0.978310,0.950785,0.966118,0.011454,78
172,1.915619,0.429404,0.007181,0.000509,GradientBoostingClassifier(random_state=42),,20,10,,,,,,{'classifier': GradientBoostingClassifier(rand...,0.909278,0.918367,0.921763,0.916469,0.005270,135
173,15.032763,5.273941,0.014593,0.003422,GradientBoostingClassifier(random_state=42),,20,50,,,,,,{'classifier': GradientBoostingClassifier(rand...,0.932594,0.944080,0.919489,0.932054,0.010046,103
174,21.833763,0.800627,0.016319,0.001313,GradientBoostingClassifier(random_state=42),,20,100,,,,,,{'classifier': GradientBoostingClassifier(rand...,0.916672,0.963009,0.919060,0.932914,0.021303,101


In [15]:
df_results_gs = df_results_gs.sort_values(by='rank_test_score')
df_results_gs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier,param_classifier__class_weight,param_classifier__max_depth,param_classifier__n_estimators,param_classifier__C,param_classifier__penalty,param_classifier__min_samples_split,param_classifier__n_neighbors,param_classifier__alpha,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
61,1.395922,0.123904,0.055765,0.004532,"SVC(C=10, class_weight={0: 1, 1: 5}, probabili...","{0: 1, 1: 5}",,,10,,,,,"{'classifier': SVC(C=10, class_weight={0: 1, 1...",0.982592,0.988593,0.978811,0.983332,0.004027,1
60,0.985476,0.058630,0.041734,0.008037,"SVC(C=10, class_weight={0: 1, 1: 5}, probabili...",,,,10,,,,,"{'classifier': SVC(C=10, class_weight={0: 1, 1...",0.980882,0.989761,0.976366,0.982336,0.005564,2
69,0.259223,0.012354,0.007883,0.000337,"LogisticRegression(max_iter=200, random_state=42)",,,,0.01,l2,,,,{'classifier': LogisticRegression(max_iter=200...,0.978972,0.985517,0.982276,0.982255,0.002672,3
38,1.085419,0.027433,0.037007,0.003912,RandomForestClassifier(random_state=42),"{0: 1, 1: 25}",5,100,,,,,,{'classifier': RandomForestClassifier(random_s...,0.980387,0.985506,0.979212,0.981702,0.002732,4
39,3.600404,0.497240,0.229803,0.099902,RandomForestClassifier(random_state=42),"{0: 1, 1: 25}",5,250,,,,,,{'classifier': RandomForestClassifier(random_s...,0.979097,0.986439,0.979098,0.981545,0.003461,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,0.003766,0.000334,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)",,,,0.1,l1,,,,{'classifier': LogisticRegression(max_iter=200...,,,,,,172
98,0.003775,0.000390,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)","{0: 1, 1: 25}",,,10,l1,,,,{'classifier': LogisticRegression(max_iter=200...,,,,,,173
74,0.003549,0.000694,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)","{0: 1, 1: 25}",,,0.01,l1,,,,{'classifier': LogisticRegression(max_iter=200...,,,,,,174
162,0.014850,0.005254,0.000000,0.000000,MultinomialNB(),,,,,,,,10,"{'classifier': MultinomialNB(), 'classifier__a...",,,,,,175


In [16]:
df_results_gs.reset_index(drop=True, inplace=True)
df_results_gs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier,param_classifier__class_weight,param_classifier__max_depth,param_classifier__n_estimators,param_classifier__C,param_classifier__penalty,param_classifier__min_samples_split,param_classifier__n_neighbors,param_classifier__alpha,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,1.395922,0.123904,0.055765,0.004532,"SVC(C=10, class_weight={0: 1, 1: 5}, probabili...","{0: 1, 1: 5}",,,10,,,,,"{'classifier': SVC(C=10, class_weight={0: 1, 1...",0.982592,0.988593,0.978811,0.983332,0.004027,1
1,0.985476,0.058630,0.041734,0.008037,"SVC(C=10, class_weight={0: 1, 1: 5}, probabili...",,,,10,,,,,"{'classifier': SVC(C=10, class_weight={0: 1, 1...",0.980882,0.989761,0.976366,0.982336,0.005564,2
2,0.259223,0.012354,0.007883,0.000337,"LogisticRegression(max_iter=200, random_state=42)",,,,0.01,l2,,,,{'classifier': LogisticRegression(max_iter=200...,0.978972,0.985517,0.982276,0.982255,0.002672,3
3,1.085419,0.027433,0.037007,0.003912,RandomForestClassifier(random_state=42),"{0: 1, 1: 25}",5,100,,,,,,{'classifier': RandomForestClassifier(random_s...,0.980387,0.985506,0.979212,0.981702,0.002732,4
4,3.600404,0.497240,0.229803,0.099902,RandomForestClassifier(random_state=42),"{0: 1, 1: 25}",5,250,,,,,,{'classifier': RandomForestClassifier(random_s...,0.979097,0.986439,0.979098,0.981545,0.003461,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171,0.003766,0.000334,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)",,,,0.1,l1,,,,{'classifier': LogisticRegression(max_iter=200...,,,,,,172
172,0.003775,0.000390,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)","{0: 1, 1: 25}",,,10,l1,,,,{'classifier': LogisticRegression(max_iter=200...,,,,,,173
173,0.003549,0.000694,0.000000,0.000000,"LogisticRegression(max_iter=200, random_state=42)","{0: 1, 1: 25}",,,0.01,l1,,,,{'classifier': LogisticRegression(max_iter=200...,,,,,,174
174,0.014850,0.005254,0.000000,0.000000,MultinomialNB(),,,,,,,,10,"{'classifier': MultinomialNB(), 'classifier__a...",,,,,,175


In [17]:
df_results_gs['param_classifier'][0]

SVC(C=10, class_weight={0: 1, 1: 5}, probability=True, random_state=42)

In [18]:
df_results_gs['mean_test_score'][0]

0.9833321012418267

### Randomized Search CV:

In [19]:
%%time
# Train the random search model
rs = RandomizedSearchCV(pipeline, params, cv=3, n_jobs=-1, scoring='roc_auc', random_state=42).fit(X_train, y_train)

CPU times: user 3.23 s, sys: 53.4 ms, total: 3.28 s
Wall time: 9.6 s


In [20]:
# Best performing model and its corresponding hyperparameters
rs.best_params_

{'classifier__class_weight': {0: 1, 1: 25},
 'classifier__C': 100,
 'classifier': SVC(C=100, class_weight={0: 1, 1: 25}, probability=True, random_state=42)}

In [21]:
# ROC-AUC score for the best model
rs.best_score_

0.9771825908130977

In [22]:
# Test data performance
print("Precision:",precision_score(rs.predict(X_test), y_test))
print("Recall:",recall_score(rs.predict(X_test), y_test))
print("ROC AUC Score:",roc_auc_score(rs.predict(X_test), y_test))

Precision: 0.9387755102040817
Recall: 0.6814814814814815
ROC AUC Score: 0.83762870754572


In [23]:
df_results_rs = pd.DataFrame(rs.cv_results_)
df_results_rs.shape

(10, 17)

In [24]:
df_results_rs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier__n_estimators,param_classifier__max_depth,param_classifier__class_weight,param_classifier,param_classifier__min_samples_split,param_classifier__C,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,4.478518,0.063239,0.14033,0.03594,250.0,10.0,"{0: 1, 1: 5}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 250, 'classifier_...",0.966816,0.982843,0.97569,0.975116,0.006556,3
1,1.047342,0.187027,0.036989,0.011904,50.0,20.0,"{0: 1, 1: 25}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 50, 'classifier__...",0.968489,0.983616,0.976366,0.976157,0.006177,2
2,0.253429,0.062511,0.017129,0.000823,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),5.0,,"{'classifier__min_samples_split': 5, 'classifi...",0.912046,0.918691,0.907853,0.912863,0.004462,10
3,2.552949,0.667843,0.094353,0.009118,100.0,10.0,"{0: 1, 1: 10}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 100, 'classifier_...",0.972217,0.982688,0.969474,0.974793,0.005694,4
4,3.038742,0.858982,0.083813,0.009802,,,"{0: 1, 1: 25}","SVC(C=100, class_weight={0: 1, 1: 25}, probabi...",,100.0,"{'classifier__class_weight': {0: 1, 1: 25}, 'c...",0.972493,0.984905,0.97415,0.977183,0.005502,1
5,0.26019,0.022616,0.022453,0.003247,10.0,10.0,"{0: 1, 1: 5}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 10, 'classifier__...",0.956728,0.967292,0.958345,0.960788,0.004646,5
6,0.252842,0.058439,0.012758,0.00471,,,,DecisionTreeClassifier(random_state=42),10.0,,"{'classifier__min_samples_split': 10, 'classif...",0.915979,0.926573,0.930588,0.92438,0.006162,7
7,1.603864,0.327122,0.006682,0.000407,10.0,20.0,,GradientBoostingClassifier(random_state=42),,,"{'classifier__n_estimators': 10, 'classifier__...",0.909278,0.918367,0.921763,0.916469,0.00527,9
8,0.085092,0.0055,0.007224,0.0012,,5.0,,DecisionTreeClassifier(random_state=42),5.0,,"{'classifier__min_samples_split': 5, 'classifi...",0.923884,0.926218,0.930576,0.926893,0.002773,6
9,0.107058,0.025288,0.006719,0.000851,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),10.0,,"{'classifier__min_samples_split': 10, 'classif...",0.916096,0.922533,0.921379,0.920003,0.002803,8


In [25]:
df_results_rs = df_results_rs.sort_values(by='rank_test_score')
df_results_rs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier__n_estimators,param_classifier__max_depth,param_classifier__class_weight,param_classifier,param_classifier__min_samples_split,param_classifier__C,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
4,3.038742,0.858982,0.083813,0.009802,,,"{0: 1, 1: 25}","SVC(C=100, class_weight={0: 1, 1: 25}, probabi...",,100.0,"{'classifier__class_weight': {0: 1, 1: 25}, 'c...",0.972493,0.984905,0.97415,0.977183,0.005502,1
1,1.047342,0.187027,0.036989,0.011904,50.0,20.0,"{0: 1, 1: 25}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 50, 'classifier__...",0.968489,0.983616,0.976366,0.976157,0.006177,2
0,4.478518,0.063239,0.14033,0.03594,250.0,10.0,"{0: 1, 1: 5}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 250, 'classifier_...",0.966816,0.982843,0.97569,0.975116,0.006556,3
3,2.552949,0.667843,0.094353,0.009118,100.0,10.0,"{0: 1, 1: 10}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 100, 'classifier_...",0.972217,0.982688,0.969474,0.974793,0.005694,4
5,0.26019,0.022616,0.022453,0.003247,10.0,10.0,"{0: 1, 1: 5}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 10, 'classifier__...",0.956728,0.967292,0.958345,0.960788,0.004646,5
8,0.085092,0.0055,0.007224,0.0012,,5.0,,DecisionTreeClassifier(random_state=42),5.0,,"{'classifier__min_samples_split': 5, 'classifi...",0.923884,0.926218,0.930576,0.926893,0.002773,6
6,0.252842,0.058439,0.012758,0.00471,,,,DecisionTreeClassifier(random_state=42),10.0,,"{'classifier__min_samples_split': 10, 'classif...",0.915979,0.926573,0.930588,0.92438,0.006162,7
9,0.107058,0.025288,0.006719,0.000851,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),10.0,,"{'classifier__min_samples_split': 10, 'classif...",0.916096,0.922533,0.921379,0.920003,0.002803,8
7,1.603864,0.327122,0.006682,0.000407,10.0,20.0,,GradientBoostingClassifier(random_state=42),,,"{'classifier__n_estimators': 10, 'classifier__...",0.909278,0.918367,0.921763,0.916469,0.00527,9
2,0.253429,0.062511,0.017129,0.000823,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),5.0,,"{'classifier__min_samples_split': 5, 'classifi...",0.912046,0.918691,0.907853,0.912863,0.004462,10


In [26]:
df_results_rs.reset_index(drop=True, inplace=True)
df_results_rs

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier__n_estimators,param_classifier__max_depth,param_classifier__class_weight,param_classifier,param_classifier__min_samples_split,param_classifier__C,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,3.038742,0.858982,0.083813,0.009802,,,"{0: 1, 1: 25}","SVC(C=100, class_weight={0: 1, 1: 25}, probabi...",,100.0,"{'classifier__class_weight': {0: 1, 1: 25}, 'c...",0.972493,0.984905,0.97415,0.977183,0.005502,1
1,1.047342,0.187027,0.036989,0.011904,50.0,20.0,"{0: 1, 1: 25}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 50, 'classifier__...",0.968489,0.983616,0.976366,0.976157,0.006177,2
2,4.478518,0.063239,0.14033,0.03594,250.0,10.0,"{0: 1, 1: 5}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 250, 'classifier_...",0.966816,0.982843,0.97569,0.975116,0.006556,3
3,2.552949,0.667843,0.094353,0.009118,100.0,10.0,"{0: 1, 1: 10}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 100, 'classifier_...",0.972217,0.982688,0.969474,0.974793,0.005694,4
4,0.26019,0.022616,0.022453,0.003247,10.0,10.0,"{0: 1, 1: 5}",RandomForestClassifier(random_state=42),,,"{'classifier__n_estimators': 10, 'classifier__...",0.956728,0.967292,0.958345,0.960788,0.004646,5
5,0.085092,0.0055,0.007224,0.0012,,5.0,,DecisionTreeClassifier(random_state=42),5.0,,"{'classifier__min_samples_split': 5, 'classifi...",0.923884,0.926218,0.930576,0.926893,0.002773,6
6,0.252842,0.058439,0.012758,0.00471,,,,DecisionTreeClassifier(random_state=42),10.0,,"{'classifier__min_samples_split': 10, 'classif...",0.915979,0.926573,0.930588,0.92438,0.006162,7
7,0.107058,0.025288,0.006719,0.000851,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),10.0,,"{'classifier__min_samples_split': 10, 'classif...",0.916096,0.922533,0.921379,0.920003,0.002803,8
8,1.603864,0.327122,0.006682,0.000407,10.0,20.0,,GradientBoostingClassifier(random_state=42),,,"{'classifier__n_estimators': 10, 'classifier__...",0.909278,0.918367,0.921763,0.916469,0.00527,9
9,0.253429,0.062511,0.017129,0.000823,,25.0,"{0: 1, 1: 10}",DecisionTreeClassifier(random_state=42),5.0,,"{'classifier__min_samples_split': 5, 'classifi...",0.912046,0.918691,0.907853,0.912863,0.004462,10


In [27]:
df_results_rs['param_classifier'][0]

SVC(C=100, class_weight={0: 1, 1: 25}, probability=True, random_state=42)

In [28]:
df_results_rs['mean_test_score'][0]

0.9771825908130977