# Workbench
**Importing the required libraries**

In [None]:
# Import the numpy and pandas package
import numpy as np
import pandas as pd

# Data Visualisation
import matplotlib.pyplot as plt
import seaborn as sns

# Import the warnings
import warnings

# Import statsmodels
import statsmodels.formula.api as smf

# Import RMSE
from statsmodels.tools.eval_measures import rmse

# Import Decison Tree Regressor
from sklearn.ensemble import RandomForestRegressor

# Import train test split
from sklearn.model_selection import GridSearchCV, cross_val_score, cross_val_predict, train

# Feature Scaling
from sklearn.preprocessing import StandardScaler

# Import the metrics
from sklearn import metrics
from sklearn.metrics import mean_squared_error, r2_score

# Import Pre-Processing
from sklearn import preprocessing

# configuration settings
%matplotlib inline
sns.set(color_codes=True)
warnings.filterwarnings('ignore') ## Surpress the warnings
sns.set_style('whitegrid')
sns.set_context('talk')
params = {'legend.fontsize': 'x-large',
          'figure.figsize': (30, 10),
          'axes.labelsize': 'x-large',
          'axes.titlesize':'x-large',
          'xtick.labelsize':'x-large',
          'ytick.labelsize':'x-large'}

plt.rcParams.update(params)

**Load the data into a dataframe**

In [None]:
# load the data into a dataframe called supermarket_till_transactions_df
supermarket_till_transactions_df = pd.read_csv("")

In [None]:
# view the top five records
supermarket_till_transactions_df.head(5)

In order to illustrate Support Vector Regression we just need two variables which are:
1. SHOP_HOUR
2. SPEND

In [None]:
supermarket_till_transactions_df = supermarket_till_transactions_df[["SHOP_HOUR","SPEND"]]
supermarket_till_transactions_df.head(5)

In [None]:
# Divide the dataset into training and testing sets
X, X_test, y, y_test = train_test_split(supermarket_till_transactions_df.iloc[:,0:-1],
                                        supermarket_till_transactions_df.iloc[:,-1],
                                        test_size=0.33,
                                        random_state=42)

In [None]:
regressor = RandomForestRegressor(n_estimators =10, random_state = 0)
regressor.fit(X,y)
regressor.fit(X_test,y_test)

In [None]:
print("R-Squared on train dataset={}".format(regressor.score(X_test,y_test)))

In [None]:
print("R-Squared on test dataset={}".format(regressor.score(X_test,y_test)))

**Visualizing the Random Forest Results**

In [None]:
#X_grid = np.arange(min(X),max(X),0.01)
#X_grid = X_grid.reshape(len(X_grid),1)
plt.scatter(X,y,color='red')
plt.plot(X,regressor.predict(X),color='blue')
plt.title('Random Forest Regression')
plt.xlabel('SHOP HOUR')
plt.ylabel('SPEND')
plt.show()