# RF Random Forest
### Using: $\textit{TensorFlow/Keras}$

## Imports

In [75]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import tensorflow.keras as tk
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score

## Load data

In [76]:
ds = pd.read_csv('D:/data/csv/TitanicPreprocessed.csv')
ds.head()

Unnamed: 0,Sex,Age,SibSp,Parch,Fare,Title_Master,Title_Miss,Title_Mr,Title_Mrs,Title_Officer,...,Ticket_STONOQ,Ticket_SWPP,Ticket_WC,Ticket_WEP,Ticket_XXX,FamilySize,Singleton,SmallFamily,LargeFamily,Survived
0,1,22.0,1,0,7.25,0,0,1,0,0,...,0,0,0,0,0,2,0,1,0,0
1,0,38.0,1,0,71.2833,0,0,0,1,0,...,0,0,0,0,0,2,0,1,0,1
2,0,26.0,0,0,7.925,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,1
3,0,35.0,1,0,53.1,0,0,0,1,0,...,0,0,0,0,1,2,0,1,0,1
4,1,35.0,0,0,8.05,0,0,1,0,0,...,0,0,0,0,1,1,0,0,0,0


### Split columns (covariates/response) and rows (train/test)

In [77]:
y = ds['Survived']
X = ds.drop(['Survived'], axis = 1)
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.25, random_state=0)

## Build model

In [78]:
hypers = {'bootstrap': True,
          'min_samples_leaf': 3,
          'n_estimators': 50, 
          'min_samples_split': 10,
          'max_features': 'sqrt',
          'max_depth': 6,
          'max_leaf_nodes': None}

model_class = RandomForestClassifier(**hypers)
model_class.fit(train_X, train_y)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=6, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=3, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [79]:
pred_class = model_class.predict(test_X)
score = accuracy_score(test_y , pred)
round(score, 2)

0.82

In [80]:
model_reg = RandomForestRegressor(
                       bootstrap=True, ccp_alpha=0.0, max_depth=6, max_features='sqrt',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=3, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [81]:
model_reg.fit(train_X, train_y)

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=6, max_features='sqrt', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=3,
                      min_samples_split=10, min_weight_fraction_leaf=0.0,
                      n_estimators=50, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

In [82]:
pred_reg = model_reg.predict(test_X)
mae = np.mean(abs(test_y - pred_reg))
print('MAE = ', round(mae, 2))


MAE =  0.3


In [83]:
res_compare = pd.DataFrame(test_y)
res_compare['pred class'] = pred_class
res_compare['pred reg'] = np.around(pred_reg,2)
res_compare.head(20)

Unnamed: 0,Survived,pred class,pred reg
495,0,0,0.18
648,0,0,0.14
278,0,0,0.22
31,1,1,0.88
255,1,1,0.67
298,1,0,0.27
609,1,1,0.85
318,1,1,0.82
484,1,1,0.45
367,1,1,0.64


## Credits & Links

https://blog.goodaudience.com/introduction-to-random-forest-algorithm-with-python-9efd1d8f0157