# Polynomial Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset

In [2]:

dataset = pd.read_csv('Sales forecast final.csv')


# Data Preprocessing

In [3]:
#convert data type to int
dataset['week no']=dataset['week no'].astype('int')

In [4]:
#selecting required columns
data=dataset[['week no','Category','Year','Sales']]
data.head()


Unnamed: 0,week no,Category,Year,Sales
0,49,2,2016,679064.37
1,49,1,2016,1268409.6
2,49,5,2016,94744.94
3,49,35,2016,1446.67
4,49,7,2016,1475152.07


In [5]:
# Select the dependant and independant variables
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

In [6]:
#encode categorical variables ('category' and 'Year')
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct=ColumnTransformer(transformers=[('encoder',OneHotEncoder(),[1,2])],remainder='passthrough',sparse_threshold=0)
X=np.array(ct.fit_transform(X))

In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


## Splitting the dataset into the Training set and Test set

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [8]:
# Feature scaling to bring all the variables within a fixed range
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train[:,-1:] = sc.fit_transform(X_train[:,-1:])
X_test[:,-1:] = sc.transform(X_test[:,-1:])


## Training the Polynomial Regression model on the Training set

In [9]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
regressor = LinearRegression()
regressor.fit(X_poly, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

## Predicting the Test set results

In [10]:
y_pred = regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[ 1.05e+06  1.05e+06]
 [ 8.21e+05  1.05e+06]
 [ 6.60e+05  6.39e+05]
 [ 2.15e+06  2.25e+06]
 [ 7.04e+04  4.00e+01]
 [ 2.89e+06  1.27e+06]
 [ 1.63e+05  2.29e+05]
 [ 2.08e+06  2.21e+06]
 [ 3.34e+13  9.95e+02]
 [ 2.79e+06  3.66e+06]
 [ 1.26e+05  3.63e+05]
 [-3.15e+04  2.40e+02]
 [-1.21e+06  4.00e-02]
 [ 1.73e+05  5.05e+04]
 [ 5.45e+05  1.12e+05]
 [ 2.18e+06  1.28e+06]
 [-7.47e+04  3.15e+02]
 [ 2.05e+06  1.78e+06]
 [ 6.54e+05  2.60e+05]
 [ 2.44e+05  2.96e+05]
 [ 5.50e+05  3.41e+05]
 [ 6.40e+01  2.38e+02]
 [ 2.41e+06  2.52e+06]
 [ 1.85e+06  2.25e+06]
 [ 3.94e+05  4.31e+05]
 [ 2.13e+06  1.42e+06]
 [ 2.50e+05  4.58e+02]
 [ 1.93e+06  1.17e+06]
 [ 1.69e+06  2.19e+06]
 [ 2.03e+06  1.01e+06]
 [ 1.84e+06  1.69e+06]
 [ 2.29e+05  2.10e+05]
 [ 7.70e+04  1.40e+05]
 [ 1.70e+05  9.00e+04]
 [ 5.21e+05  2.68e+05]
 [ 2.49e+06  3.82e+06]
 [-3.34e+04  1.32e+05]
 [ 8.62e+04  2.40e+05]
 [ 1.80e+06  2.35e+06]
 [ 2.89e+05  2.03e+05]
 [ 1.00e+14  9.18e+02]
 [ 4.75e+05  2.20e+05]
 [-8.93e+05  0.00e+00]
 [ 1.58e+05

## Evaluating the Model Performance

In [12]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = regressor, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

Accuracy: 71.88 %
Standard Deviation: 5.44 %
