# Gradient Boost Regressor

## Several instances (ensemble) of weak classifiers with each instances  label (y values) adjusted to use the errors from the previous classifier

### Import the necessary packages

In [1]:
import pyodbc 
import pandas as pd
import numpy as np

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingRegressor 
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE

### Fetch data from sql server that will be used for Training

In [2]:
conn = pyodbc.connect('Driver={SQL Server};'
                      'Server=LAPTOP-HVRMUNPF;'
                      'PORT=1433;'
                      'Database=RENTERS_STP;'
                      'Trusted_Connection=yes;'
                      )

query = 'SELECT  * from [RENTERS_STP].[dbo].[DR_DetailedRequest_classification]'

    
df = pd.read_sql(query, conn)

### Get the 'features' and 'labels' from the dataset. Split the dataset to training and testing datasets

In [3]:
y=df['PREDICTION_VALUE_Y_BOOL']
X = pd.get_dummies(df.drop(['PREDICTION_VALUE_Y_BOOL'], axis=1)).fillna(0)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, random_state=42, stratify=y)

SEED=1

### Instantiate the classifiers

In [4]:
# Instantiate dt
dt = DecisionTreeClassifier(max_depth=2, random_state=1)

# Instantiate gb
gb = GradientBoostingRegressor(max_depth=4, 
            n_estimators=200,
            random_state=2)

### Fit and train the model

In [5]:
# Fit gb to the training set
gb.fit(X_train, y_train)

# Predict test set labels
y_pred = gb.predict(X_test)


### Model Evaluation

In [6]:
# Compute MSE
mse_test = MSE(y_test,y_pred)

# Compute RMSE
rmse_test = mse_test ** (1/2)

# Print RMSE
print('Test set RMSE of gb: {:.6f}'.format(rmse_test))

Test set RMSE of gb: 0.093934
