### Import library

In [1]:
import pandas as pd                   
import matplotlib.pyplot as plt        
import seaborn as sns           
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

### Prepare Data

In [2]:
train_features = pd.read_csv('train_features.csv')
train_labels = pd.read_csv('train_labels.csv')
train_labels = train_labels.Survived

test_features = pd.read_csv('test_features.csv')
test_labels = pd.read_csv('test_labels.csv')
test_labels = test_labels.Survived
test_features

Unnamed: 0,Pclass,Sex,Age,Fare,Family_size,Title_Master,Title_Miss,Title_Mr,Title_Mrs,Title_Officer,...,Cabin_B,Cabin_C,Cabin_D,Cabin_E,Cabin_F,Cabin_G,Cabin_T,Embarked_C,Embarked_Q,Embarked_S
0,3,1,29.699118,15.2458,2,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,2,1,31.000000,10.5000,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
2,3,1,20.000000,7.9250,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
3,2,0,6.000000,33.0000,1,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,3,0,14.000000,11.2417,1,0,1,0,0,0,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174,3,1,17.000000,7.1250,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
175,3,1,29.699118,7.2250,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
176,3,0,38.000000,31.3875,6,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
177,2,0,17.000000,10.5000,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,1


### Train model

In [3]:
def print_results(results):
    print('BEST PARAMS: {}\n'.format(results.best_params_))

    means = results.cv_results_['mean_test_score']
    stds = results.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, results.cv_results_['params']):
        print('{} (+/-{}) for {}'.format(round(mean, 3), round(std * 2, 3), params))
        
rf = RandomForestRegressor()
parameters = {
    'n_estimators': [5, 50, 250],
    'max_depth': [2, 4, 8, 16, 32, None]
}

cv = GridSearchCV(rf, parameters, cv=5)
cv.fit(train_features[['Age','Pclass']].values, train_features['Fare'].values)

print_results(cv)

best_rf=cv.best_estimator_

BEST PARAMS: {'max_depth': 2, 'n_estimators': 250}

0.367 (+/-0.143) for {'max_depth': 2, 'n_estimators': 5}
0.386 (+/-0.183) for {'max_depth': 2, 'n_estimators': 50}
0.386 (+/-0.179) for {'max_depth': 2, 'n_estimators': 250}
0.205 (+/-0.459) for {'max_depth': 4, 'n_estimators': 5}
0.298 (+/-0.209) for {'max_depth': 4, 'n_estimators': 50}
0.298 (+/-0.25) for {'max_depth': 4, 'n_estimators': 250}
0.072 (+/-0.63) for {'max_depth': 8, 'n_estimators': 5}
0.174 (+/-0.36) for {'max_depth': 8, 'n_estimators': 50}
0.179 (+/-0.351) for {'max_depth': 8, 'n_estimators': 250}
0.177 (+/-0.329) for {'max_depth': 16, 'n_estimators': 5}
0.176 (+/-0.315) for {'max_depth': 16, 'n_estimators': 50}
0.156 (+/-0.406) for {'max_depth': 16, 'n_estimators': 250}
0.117 (+/-0.526) for {'max_depth': 32, 'n_estimators': 5}
0.157 (+/-0.365) for {'max_depth': 32, 'n_estimators': 50}
0.169 (+/-0.365) for {'max_depth': 32, 'n_estimators': 250}
0.034 (+/-0.736) for {'max_depth': None, 'n_estimators': 5}
0.148 (+/-0.393

### Test Model

In [4]:
print(
    'Score on testing data: ' 
    + str("{:.2%}".format(best_rf.score(test_features[['Age','Pclass']].values, test_features['Fare'].values))))

Score on testing data: 30.03%


In [5]:
best_rf.predict(test_features[['Age','Pclass']].values)

array([ 14.25669476,  16.10430947,  14.25669476,  25.52414638,
        21.09288515, 103.02930324,  14.25669476,  14.33586599,
        14.33586599, 119.93706317,  88.18069416,  14.25669476,
        14.25669476,  14.25669476,  16.10430947, 120.7468724 ,
        83.49092763,  14.25669476,  16.10430947,  70.73316635,
        14.25669476,  89.84061918,  14.25669476,  14.25669476,
        14.25669476,  23.67653167,  83.82726286,  16.10430947,
        23.67653167,  14.25669476,  14.25669476,  14.25669476,
        89.84061918,  14.25669476,  14.25669476,  14.25669476,
        92.01935344,  14.25669476,  89.84061918,  14.25669476,
        16.10430947,  14.25669476,  14.25669476,  14.25669476,
        14.25669476,  14.33586599,  14.25669476,  14.25669476,
        14.25669476,  70.73316635,  23.67653167,  70.73316635,
        14.25669476, 120.7468724 ,  14.25669476, 120.7468724 ,
        16.10430947,  70.73316635,  16.10430947,  14.25669476,
        14.25669476,  22.94049985,  16.10430947,  89.84