In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
pd.set_option('display.max_columns', None)

In [8]:
sns.set(rc={'figure.figsize':(12,6)})

In [9]:
df = pd.read_csv('Data/PreparedData.csv')

In [10]:
df.columns

Index(['Size', 'YearsOfAveliability', 'Weight', 'StanstadCost', 'ListPrice',
       'FinalSellPrice', 'PriceToCost', 'OrdersQuantity', 'Black', 'Silver',
       'Red', 'Yellow', 'Blue', 'H', 'L', 'M', 'isNew', '2011Sales',
       '2012Sales', '2013Sales', 'Mountain Bikes', 'Road Bikes',
       'Touring Bikes', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
       'Sep', 'Oct', 'Nov', 'Dec', 'Isdiscounted', 'IsPriceHigherThanCosts'],
      dtype='object')

##### Get X and y

In [11]:
# REpresentants:
# Color: Blcakc
# Class: H
# Sub Name: Mountain
# Moth: Jan
X = df[['Size', 'YearsOfAveliability', 'Weight', 'StanstadCost'
       , 'PriceToCost', 'Silver', 'Red', 'Yellow', 'Blue', 
        'L', 'M', 'isNew','Road Bikes', 'Touring Bikes', 
        'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
       'Sep', 'Oct', 'Nov', 'Dec', 'Isdiscounted', 'IsPriceHigherThanCosts']]

In [12]:
y = df['OrdersQuantity']

#### Train test split

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=101)

#### Scale data

In [15]:
from sklearn.preprocessing import MinMaxScaler

In [17]:
scaler = MinMaxScaler()

In [18]:
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

In [19]:
from sklearn.svm import SVR,LinearSVR

In [20]:
from sklearn.model_selection import GridSearchCV

#### Grid search params

In [21]:
param_grid = {'C':[0.001,0.01,0.1,0.5,1],
             'kernel':['linear','rbf','poly'],
              'gamma':['scale','auto'],
              'degree':[2,3,4],
              'epsilon':[0,0.01,0.1,0.5,1,2]}

In [22]:
svr = SVR()
grid = GridSearchCV(svr,param_grid=param_grid)

#### Fit model

In [23]:
grid.fit(scaled_X_train,y_train)

GridSearchCV(estimator=SVR(),
             param_grid={'C': [0.001, 0.01, 0.1, 0.5, 1], 'degree': [2, 3, 4],
                         'epsilon': [0, 0.01, 0.1, 0.5, 1, 2],
                         'gamma': ['scale', 'auto'],
                         'kernel': ['linear', 'rbf', 'poly']})

In [24]:
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [25]:
grid.best_params_

{'C': 1, 'degree': 4, 'epsilon': 0.1, 'gamma': 'scale', 'kernel': 'poly'}

In [26]:
grid_preds = grid.predict(scaled_X_test)

In [27]:
mean_absolute_error(y_test,grid_preds)

15.623518433868945

In [28]:
np.sqrt(mean_squared_error(y_test,grid_preds))

23.828155462498756

In [32]:
len(grid_preds.reshape(-1,1))

841

In [95]:
check = pd.DataFrame()
check['TrueValues'] = y_test

In [96]:
check['predictions'] = grid_preds.reshape(-1,1)

In [97]:
check.reset_index(inplace=True)

In [98]:
check.drop('index', inplace=True, axis=1)

In [99]:
check

Unnamed: 0,TrueValues,predictions
0,4,4.611401
1,6,21.123888
2,34,23.356759
3,17,16.941623
4,17,32.869252
...,...,...
836,67,38.873222
837,26,16.709090
838,6,11.384998
839,23,41.681136


In [100]:
check['SVMModelerror'] = check['predictions'] - check['TrueValues']

I take mean y_train values to compare with my model results

In [113]:
np.mean(y_train)

30.95616717635066

In [114]:
check['MeanModelerror'] = np.mean(y_train) - check['TrueValues']

In [115]:
np.mean(check['SVMModelerror'])

-3.9369055534430664

In [116]:
np.mean(check['MeanModelerror'])

1.7135988053637132

My model is much worst than simple mean that we could use without any costs...

In [118]:
scaled_X_train

array([[0.41666667, 0.        , 0.33025262, ..., 0.        , 0.        ,
        1.        ],
       [0.41666667, 0.        , 0.39186691, ..., 0.        , 1.        ,
        0.        ],
       [0.25      , 0.        , 0.30807147, ..., 0.        , 0.        ,
        1.        ],
       ...,
       [0.41666667, 0.        , 0.30252619, ..., 0.        , 0.        ,
        0.        ],
       [0.58333333, 0.        , 0.91866913, ..., 0.        , 0.        ,
        1.        ],
       [0.58333333, 1.        , 0.34812076, ..., 0.        , 0.        ,
        1.        ]])