In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import GridSearchCV


In [2]:
df = pd.read_csv('train.csv')

In [3]:
df.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [4]:
df.Sex = df.Sex.map({'male':0,'female':1})
df.Embarked = df.Embarked.map({'S':0,'C':1,'Q':2})

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    int64  
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    float64
dtypes: float64(3), int64(6), object(3)
memory usage: 83.7+ KB


In [6]:
df.Age = df.Age.fillna(df.Age.median())
df = df.dropna(subset=['Embarked'])

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 889 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  889 non-null    int64  
 1   Survived     889 non-null    int64  
 2   Pclass       889 non-null    int64  
 3   Name         889 non-null    object 
 4   Sex          889 non-null    int64  
 5   Age          889 non-null    float64
 6   SibSp        889 non-null    int64  
 7   Parch        889 non-null    int64  
 8   Ticket       889 non-null    object 
 9   Fare         889 non-null    float64
 10  Cabin        202 non-null    object 
 11  Embarked     889 non-null    float64
dtypes: float64(3), int64(6), object(3)
memory usage: 90.3+ KB


In [8]:
data = df[['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']]
label = df['Survived']

In [9]:
params = [{'n_estimators':[70,80,90,100],'random_state':range(10)}]

In [10]:
clf = GridSearchCV(RFC(),params,cv=5)

In [11]:
clf.fit(data,label)

GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid=[{'n_estimators': [70, 80, 90, 100],
                          'random_state': range(0, 10)}])

In [15]:
clf.cv_results_

{'mean_fit_time': array([0.09250484, 0.09158311, 0.09379606, 0.08996582, 0.09872155,
        0.09810066, 0.10227847, 0.09996648, 0.09840751, 0.10229907,
        0.11090841, 0.10657024, 0.11689801, 0.11390657, 0.11474295,
        0.10842113, 0.11013956, 0.11460319, 0.10640268, 0.11207294,
        0.12509413, 0.12844677, 0.12436695, 0.12092924, 0.11985621,
        0.12644253, 0.12324309, 0.12384038, 0.12158179, 0.12529182,
        0.12374897, 0.12405162, 0.12370291, 0.12917275, 0.1241106 ,
        0.12374759, 0.12343225, 0.12595444, 0.12428179, 0.13222494]),
 'std_fit_time': array([0.00311326, 0.00133234, 0.0044144 , 0.00081927, 0.00351308,
        0.0063219 , 0.0021059 , 0.00198732, 0.0047146 , 0.00073964,
        0.00621729, 0.00196157, 0.0031205 , 0.00148895, 0.00454024,
        0.00297948, 0.00389494, 0.00253618, 0.00598182, 0.003621  ,
        0.0035318 , 0.00092646, 0.00273443, 0.00338197, 0.00207945,
        0.00457919, 0.00731304, 0.00310661, 0.00432921, 0.00419683,
        0.000

In [12]:
results = pd.DataFrame(clf.cv_results_)

In [16]:
results

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_n_estimators,param_random_state,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.092505,0.003113,0.007993,4.6e-05,70,0,"{'n_estimators': 70, 'random_state': 0}",0.803371,0.803371,0.859551,0.764045,0.836158,0.813299,0.032511,8
1,0.091583,0.001332,0.008458,0.000541,70,1,"{'n_estimators': 70, 'random_state': 1}",0.780899,0.808989,0.865169,0.775281,0.80791,0.807649,0.031855,31
2,0.093796,0.004414,0.00802,9.9e-05,70,2,"{'n_estimators': 70, 'random_state': 2}",0.786517,0.808989,0.848315,0.769663,0.841808,0.811058,0.030508,15
3,0.089966,0.000819,0.007873,0.000121,70,3,"{'n_estimators': 70, 'random_state': 3}",0.780899,0.808989,0.837079,0.786517,0.819209,0.806538,0.020773,33
4,0.098722,0.003513,0.008657,0.00034,70,4,"{'n_estimators': 70, 'random_state': 4}",0.786517,0.814607,0.865169,0.769663,0.847458,0.816683,0.035842,1
5,0.098101,0.006322,0.008559,0.000806,70,5,"{'n_estimators': 70, 'random_state': 5}",0.769663,0.803371,0.848315,0.780899,0.824859,0.805421,0.028642,34
6,0.102278,0.002106,0.008695,0.000479,70,6,"{'n_estimators': 70, 'random_state': 6}",0.786517,0.808989,0.837079,0.769663,0.813559,0.803161,0.023202,40
7,0.099966,0.001987,0.008503,0.000478,70,7,"{'n_estimators': 70, 'random_state': 7}",0.786517,0.803371,0.853933,0.780899,0.819209,0.808786,0.026267,27
8,0.098408,0.004715,0.009078,0.000714,70,8,"{'n_estimators': 70, 'random_state': 8}",0.775281,0.808989,0.865169,0.786517,0.813559,0.809903,0.031029,25
9,0.102299,0.00074,0.008719,0.000578,70,9,"{'n_estimators': 70, 'random_state': 9}",0.786517,0.797753,0.848315,0.769663,0.836158,0.807681,0.029846,29


In [13]:
results[results.rank_test_score <=3].mean_test_score

4     0.816683
22    0.816676
32    0.815553
Name: mean_test_score, dtype: float64

In [14]:
results[results.rank_test_score <=3].params

4      {'n_estimators': 70, 'random_state': 4}
22     {'n_estimators': 90, 'random_state': 2}
32    {'n_estimators': 100, 'random_state': 2}
Name: params, dtype: object