# Workbench

**Importing the required libraries**

In [None]:
# Import the numpy and pandas package
import numpy as np
import pandas as pd

# Data Visualisation
import matplotlib.pyplot as plt
import seaborn as sns

# Import the warnings
import warnings

# Import statsmodels
import statsmodels.formula.api as smf

# Import RMSE
from statsmodels.tools.eval_measures import rmse

# Import Decison Tree Regressor
from sklearn.tree import DecisionTreeRegressor

# Import train test split
from sklearn.model_selection import GridSearchCV, cross_val_score, cross_val_predict, train

# Feature Scaling
from sklearn.preprocessing import StandardScaler

# Import the metrics
from sklearn import metrics
from sklearn.metrics import mean_squared_error, r2_score

# Import Pre-Processing
from sklearn import preprocessing

# configuration settings
%matplotlib inline
sns.set(color_codes=True)
warnings.filterwarnings('ignore') ## Surpress the warnings
sns.set_style('whitegrid')
sns.set_context('talk')
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (30, 10),
          'axes.labelsize': 'x-large',
          'axes.titlesize':'x-large',
          'xtick.labelsize':'x-large',
          'ytick.labelsize':'x-large'}

plt.rcParams.update(params)

**Load the data into a dataframe**

In [None]:
#load the data into a dataframe called supermarket_till_transactions_df
supermarket_till_transactions_df = pd.read_csv("")

In [None]:
# view the top five records
supermarket_till_transactions_df.head(5)

In order to illustrate Support Vector Regression we just need two variables which are:
1. SHOP_HOUR
2. SPEND

In [None]:
supermarket_till_transactions_df = supermarket_till_transactions_df[["SHOP_HOUR","SPEND"]]
supermarket_till_transactions_df.head(5)

In [None]:
X = supermarket_till_transactions_df.iloc[:,:-1].values
y = supermarket_till_transactions_df.iloc[:,-1].values

In [None]:
# Divide the dataset into training and testing sets
X, X_test, y, y_test = train_test_split(supermarket_till_transactions_df.iloc[:,0:-1],
                                        supermarket_till_transactions_df.iloc[:,-1],
                                        test_size=0.33,
                                        random_state=42)
#X.reset_index(inplace=True)
#y = y.reset_index()

#X_test.reset_index(inplace=True)
#y_test = y_test.reset_index()

In [None]:
dtm = DecisionTreeRegressor(max_depth=4,
                            min_samples_split=5,
                            max_leaf_nodes=10)
dtm.fit(X,y)
print("R-Squared on train dataset={}".format(dtm.score(X_test,y_test)))

dtm.fit(X_test,y_test)
print("R-Squared on test dataset={}".format(dtm.score(X_test,y_test)))

**Hyper-parameter tuning with GridSearchCV**

In [None]:
param_grid = {"criterion": ["mse", "mae"],
              "min_samples_split": [10, 20, 40],
              "max_depth": [2, 6, 8],
              "min_samples_leaf": [20, 40, 100],
              "max_leaf_nodes": [5, 20, 100],
             }

grid_cv_dtm = GridSearchCV(dtm, param_grid, cv=5)

grid_cv_dtm.fit(X,y)

In [None]:
print("R-Squared::{}".format(grid_cv_dtm.best_score_))
print("Best Hyperparameters::\n{}".format(grid_cv_dtm.best_params_))

In [None]:
fig,ax = plt.subplots()
sns.pointplot(data=df[['mean_test_score',
                       'param_max_leaf_nodes',
                       'param_max_depth']],
              y='mean_test_score',x='param_max_depth',
              hue='param_max_leaf_nodes',ax=ax)
ax.set(title="Effect of Depth and Leaf Nodes on Model Performance")

In [None]:
# Checking the training model scores
r2_scores = cross_val_score(grid_cv_dtm.best_estimator_, X, y, cv=10)
mse_scores = cross_val_score(grid_cv_dtm.best_estimator_, X, y, cv=10,scoring='neg_mean_squ

print("avg R-squared::{:.3f}".format(np.mean(r2_scores)))
print("MSE::{:.3f}".format(np.mean(mse_scores)))

In [None]:
best_dtm_model = grid_cv_dtm.best_estimator_

y_pred = best_dtm_model.predict(X_test)

r2_score = best_dtm_model.score(X_test,y_test)
print("R-squared:{:.3f}".format(r2_score))
print("MSE: %.2f" % metrics.mean_squared_error(y_test, y_pred))