## Import Libraries

In [0]:
import numpy as np 
import pandas as pd 
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error

from dataset import clean_and_split_dataset

In [0]:
data = pd.read_csv('/home/notebooks/storage/data/Boston.csv')
data

Unnamed: 0.1,Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,1,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,3,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,5,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,502,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,9.67,22.4
502,503,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,504,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,505,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


### Splitting data for modeling

In [0]:
X_train, X_test, y_train, y_test = clean_and_split_dataset(data)

### ML Models

**Random Forest Regressor Model**

In [0]:
RandomForestRegressorModel = RandomForestRegressor(n_estimators=100, max_depth=7, random_state=0)
RandomForestRegressorModel.fit(X_train, y_train)

print('Random Forest Regressor Train Score is : ' , RandomForestRegressorModel.score(X_train, y_train))
print('Random Forest Regressor Test Score is : ' , RandomForestRegressorModel.score(X_test, y_test))

Random Forest Regressor Train Score is :  0.9660216413167757
Random Forest Regressor Test Score is :  0.9016815948465721


In [0]:
y_pred_rf = RandomForestRegressorModel.predict(X_test)
MSEValue_RF = mean_squared_error(y_test, y_pred_rf, multioutput='uniform_average')
print('Mean Absolute Error Value is : ', np.sqrt(MSEValue_RF))

Mean Absolute Error Value is :  3.0018865729744784


**Gradient Boosting Regressor**

In [0]:
GBRModel = GradientBoostingRegressor(n_estimators=100, max_depth=4, learning_rate = 0.05, random_state=0)
GBRModel.fit(X_train, y_train)

print('GBRModel Train Score is : ' , GBRModel.score(X_train, y_train))
print('GBRModel Test Score is : ' , GBRModel.score(X_test, y_test))

GBRModel Train Score is :  0.9843405667772539
GBRModel Test Score is :  0.913998457543389


In [0]:
y_pred_gbr = GBRModel.predict(X_test)
MSEValue_GB = mean_squared_error(y_test, y_pred_gbr, multioutput='uniform_average')
print('Mean Absolute Error Value is : ', np.sqrt(MSEValue_GB))

Mean Absolute Error Value is :  2.8075660747032116


**XGB Regressor**

In [0]:
XGBModel = XGBRegressor(n_estimators=100, max_depth=3, eta=0.09, subsample=0.7, colsample_bytree=0.7)
XGBModel.fit(X_train, y_train)

print('XGBoost Regressor Train Score is : ' , XGBModel.score(X_train, y_train))
print('XGBoost Regressor Test Score is : ' , XGBModel.score(X_test, y_test))

XGBoost Regressor Train Score is :  0.9706096670819414
XGBoost Regressor Test Score is :  0.9146326905881368


In [0]:
y_pred_x = XGBModel.predict(X_test)
MSEValue_X = mean_squared_error(y_test, y_pred_x, multioutput='uniform_average')
print('Mean Squared Error Value by Square root is : ', np.sqrt(MSEValue_X))

Mean Squared Error Value by Square root is :  2.7971944800826836
