# Polynomial Regression

## Importing Needed Packages

In [34]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error , r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Read data with pandas

In [2]:
df = pd.read_csv('./data/FuelConsumption.csv')
df.head()

Unnamed: 0,MODELYEAR,MAKE,MODEL,VEHICLECLASS,ENGINESIZE,CYLINDERS,TRANSMISSION,FUELTYPE,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,FUELCONSUMPTION_COMB_MPG,CO2EMISSIONS
0,2014,ACURA,ILX,COMPACT,2.0,4,AS5,Z,9.9,6.7,8.5,33,196
1,2014,ACURA,ILX,COMPACT,2.4,4,M6,Z,11.2,7.7,9.6,29,221
2,2014,ACURA,ILX HYBRID,COMPACT,1.5,4,AV7,Z,6.0,5.8,5.9,48,136
3,2014,ACURA,MDX 4WD,SUV - SMALL,3.5,6,AS6,Z,12.7,9.1,11.1,25,255
4,2014,ACURA,RDX AWD,SUV - SMALL,3.5,6,AS6,Z,12.1,8.7,10.6,27,244


#### Select This Features
##### Data exploration has been done in other notebooks and we will not do it here anymore and we will directly separate the required futures.

In [3]:
cdf = df[['ENGINESIZE' , 'CYLINDERS' , 'FUELCONSUMPTION_CITY' , 'FUELCONSUMPTION_HWY' , 'FUELCONSUMPTION_COMB' , 'CO2EMISSIONS']]
cdf.head()

Unnamed: 0,ENGINESIZE,CYLINDERS,FUELCONSUMPTION_CITY,FUELCONSUMPTION_HWY,FUELCONSUMPTION_COMB,CO2EMISSIONS
0,2.0,4,9.9,6.7,8.5,196
1,2.4,4,11.2,7.7,9.6,221
2,1.5,4,6.0,5.8,5.9,136
3,3.5,6,12.7,9.1,11.1,255
4,3.5,6,12.1,8.7,10.6,244


## Model
#### Select the variables and then split the train and test data

In [4]:
X = cdf[['ENGINESIZE' , 'CYLINDERS' , 'FUELCONSUMPTION_CITY' , 'FUELCONSUMPTION_HWY' , 'FUELCONSUMPTION_COMB']]
y = cdf[['CO2EMISSIONS']]

# split %80 for tarin and %20 for test
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size=0.2 , random_state=42)

print(X.shape)
print(X_train.shape)
print(X_test.shape)
print("------------------------")
print(y.shape)
print(y_train.shape)
print(y_test.shape)

(1067, 5)
(853, 5)
(214, 5)
------------------------
(1067, 1)
(853, 1)
(214, 1)


### Modeling
#### Create regression model

In [5]:
model = LinearRegression()

#### Create the polynomial and determine the degree of power

In [35]:
# test it before and get this result
# degree = 2 -------> 0.91
# degree = 3 -------> 0.93
# degree = 4 -------> 0.94
# degree = 5 -------> 0.31

pf = PolynomialFeatures(degree=4)

#### Create new X_train and test for polynomial

In [36]:
X_train_poly = pf.fit_transform(X_train)
X_test_poly = pf.fit_transform(X_test)

#### Training the model with train poly

In [37]:
model.fit(X_train_poly , y_train)

#### Using the model to predict the test data

In [38]:
y_pred = model.predict(X_test_poly)

#### Calculating the Mean Square Error and R-Squared

In [39]:
# mse
mse = mean_squared_error(y_test , y_pred)

# r2-score
r2 = r2_score(y_test , y_pred)

# show result
print(f"mean squared error: {mse}")
print(f"R-Squared: {r2}")

mean squared error: 240.85066289207268
R-Squared: 0.9417523972100343
