### Trained a Random Forest model

In [7]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()

#### List of parameters for hyperparameter tuning

In [8]:
param = {
    'max_depth':[3,6,9,12],
    'n_estimators' : [10,50,100,200] 
}

# Hyperparameter optimization using RandomizedSearchCV 

In [9]:
from sklearn.metrics import mean_squared_error,make_scorer
from sklearn.model_selection import RandomizedSearchCV
random_search=RandomizedSearchCV(rf,param_distributions=param,n_iter=5,scoring=make_scorer(mean_squared_error),n_jobs=-1,cv=5,verbose=3)

In [10]:
random_search.fit(X,y)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  19 out of  25 | elapsed:   11.9s remaining:    3.7s
[Parallel(n_jobs=-1)]: Done  25 out of  25 | elapsed:   14.2s finished


RandomizedSearchCV(cv=5, estimator=RandomForestRegressor(), n_iter=5, n_jobs=-1,
                   param_distributions={'max_depth': [3, 6, 9, 12],
                                        'n_estimators': [10, 50, 100, 200]},
                   scoring=make_scorer(mean_squared_error), verbose=3)

In [11]:
means = random_search.cv_results_['mean_test_score']
params = random_search.cv_results_['params']
for mean, param in zip(means, params):
    print("%f with: %r" % (mean, param))
    if mean == min(means):
        print('Best parameters with the minimum Mean Square Error are:',param)

1212171.965558 with: {'n_estimators': 50, 'max_depth': 9}
1203943.597910 with: {'n_estimators': 100, 'max_depth': 9}
Best parameters with the minimum Mean Square Error are: {'n_estimators': 100, 'max_depth': 9}
1242554.264999 with: {'n_estimators': 50, 'max_depth': 12}
1387927.715629 with: {'n_estimators': 200, 'max_depth': 3}
1204071.376814 with: {'n_estimators': 10, 'max_depth': 6}


# Evaluating the model for Train and Test set 

In [12]:
rf = RandomForestRegressor(
    n_estimators=100, max_depth=6,
)

In [13]:
rf.fit(X,y)

RandomForestRegressor(max_depth=6)

In [14]:
y_pred = rf.predict(X)

In [17]:
from sklearn.metrics import r2_score,mean_squared_error

score = r2_score(y,y_pred)
print("Score:",100*score)
print("RMSE : %.4g" % np.sqrt(mean_squared_error(y,y_pred)))

Score: 61.53361077164543
RMSE : 1058


In [18]:
from sklearn.metrics import mean_squared_error,make_scorer,mean_absolute_error
from sklearn.model_selection import cross_val_score

#Perform cross-validation:
cv_score = cross_val_score(rf,X, y, cv=20, scoring = make_scorer(mean_squared_error))
cv_score = np.sqrt(np.abs(cv_score))
    
#Print model report:
print("\nModel Report")
print("MAE : %.4g" % np.sqrt(mean_absolute_error(y,y_pred)))
print("RMSE : %.4g" % np.sqrt(mean_squared_error(y,y_pred)))
print("CV Score : Mean - %.4g | Std - %.4g | Min - %.4g | Max - %.4g" % (np.mean(cv_score),np.std(cv_score),np.min(cv_score),np.max(cv_score)))


Model Report
MAE : 27.3
RMSE : 1058
CV Score : Mean - 1090 | Std - 47.1 | Min - 1021 | Max - 1210


In [None]:
import pickle

# Saving the model to a pickle file
with open('randomForestModel.pkl', 'wb') as pkl:
    pickle.dump(rf, pkl)
