# Polynomial Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('CA_housing.csv')
dataset = dataset.dropna(axis=0)

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [-1])], remainder='passthrough')
X = pd.concat([dataset.iloc[:, :-2], dataset.iloc[:, -1]], axis=1).values
X = np.array(ct.fit_transform(X))
y = dataset.iloc[:, -2:-1].values


In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Linear Regression model on the whole dataset

In [5]:
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

LinearRegression()

## Training the Polynomial Regression model on the whole dataset

In [6]:
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 4)
X_train_poly = poly_reg.fit_transform(X_train)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_train_poly, y_train)

LinearRegression()

## Visualising the Linear Regression results

In [7]:
# plt.scatter(X, y, color = 'red')
# plt.plot(X, lin_reg.predict(X), color = 'blue')
# plt.title('Truth or Bluff (Linear Regression)')
# plt.xlabel('Position Level')
# plt.ylabel('Salary')
# plt.show()

## Visualising the Polynomial Regression results

In [8]:
# plt.scatter(X, y, color = 'red')
# plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color = 'blue')
# plt.title('Truth or Bluff (Polynomial Regression)')
# plt.xlabel('Position level')
# plt.ylabel('Salary')
# plt.show()

## Visualising the Polynomial Regression results (for higher resolution and smoother curve)

In [9]:
# X_grid = np.arange(min(X), max(X), 0.1)
# X_grid = X_grid.reshape((len(X_grid), 1))
# plt.scatter(X, y, color = 'red')
# plt.plot(X_grid, lin_reg_2.predict(poly_reg.fit_transform(X_grid)), color = 'blue')
# plt.title('Truth or Bluff (Polynomial Regression)')
# plt.xlabel('Position level')
# plt.ylabel('Salary')
# plt.show()

## Predicting a new result with Linear Regression

In [10]:
lin_reg.predict([[0.0, 1.0, 0.0, 0.0, 0.0, -121.24, 39.37, 16.0, 2785.0, 616.0,
       1387.0, 530.0, 2.3886]])

array([[66946.23662034]])

## Predicting a new result with Polynomial Regression

In [None]:
lin_reg_2.predict(poly_reg.fit_transform([[0.0, 1.0, 0.0, 0.0, 0.0, -121.24, 39.37, 16.0, 2785.0, 616.0,
       1387.0, 530.0, 2.3886]]))

In [None]:
y[-1]

In [None]:
y_pred = lin_reg_2.predict(poly_reg.fit_transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
df = pd.DataFrame(data=np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1), columns=['Predicted ($)', 'Actual ($)'])
df


In [None]:
# import dataframe_image as dfi
# dfi.export(df, 'act_pred_poly_4.png', max_rows=5)

In [None]:
px = np.linspace(0, max(y_test), int(max(y_test)))
py = np.linspace(0, max(y_test), int(max(y_test)))

In [None]:
plt.figure(figsize=(10,6))
import seaborn as sns
sns.set()
plt.scatter(y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1), color = 'red')
plt.plot(px, py, color='blue')
plt.title('True vs Predicted Median Home Values (Polynomial Regression)')
plt.xlabel('Predicted Values')
plt.ylabel('True Values')
plt.show()

In [None]:
print(lin_reg_2.coef_)
print(lin_reg_2.intercept_)

In [None]:
from sklearn.metrics import r2_score
print('R2: ', r2_score(y_test, y_pred))
print('Adjusted R2: ', 1-(1-r2_score(y_test, y_pred))*((len(X_test)-1)/(len(X_test)-len(X_test[0])-1)))

In [None]:
from sklearn.metrics import mean_squared_error
import math
mean_squared_error(y_test, y_pred, squared=False)