# Polynomial Regression

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('insurance.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Encoding categorical data

In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
le = LabelEncoder()
X[:, 1] = le.fit_transform(X[:, 1])
X[:, 4] = le.fit_transform(X[:, 4])
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [5])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
print(X)

[[0.0 0.0 0.0 ... 27.9 0 1]
 [0.0 0.0 1.0 ... 33.77 1 0]
 [0.0 0.0 1.0 ... 33.0 3 0]
 ...
 [0.0 0.0 1.0 ... 36.85 0 0]
 [0.0 0.0 0.0 ... 25.8 0 0]
 [0.0 1.0 0.0 ... 29.07 0 1]]


## Splitting the dataset into the Training set and Test set

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

## Training the Polynomial Regression model on the Training set

In [5]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
poly_reg = PolynomialFeatures(degree = 4)
X_poly = poly_reg.fit_transform(X_train)
regressor = LinearRegression()
regressor.fit(X_poly, y_train)

## Predicting the Test set results

In [6]:
y_pred = regressor.predict(poly_reg.transform(X_test))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[12191.9   9724.53]
 [ 9346.97  8547.69]
 [61741.81 45702.02]
 [10029.23 12950.07]
 [18605.01  9644.25]
 [ 2283.76  4500.34]
 [ 3423.56  2198.19]
 [12203.02 11436.74]
 [ 7866.38  7537.16]
 [ 9684.33  5425.02]
 [ 6233.28  6753.04]
 [10849.86 10493.95]
 [ 8878.65  7337.75]
 [ 7923.99  4185.1 ]
 [20792.77 18310.74]
 [15313.06 10702.64]
 [13913.07 12523.6 ]
 [ 8475.47  3490.55]
 [ 8178.59  6457.84]
 [29303.89 33475.82]
 [21370.18 23967.38]
 [15085.47 12643.38]
 [11310.79 23045.57]
 [29562.2  23065.42]
 [ 3847.32  1674.63]
 [ 9345.74  4667.61]
 [ 5436.56  3732.63]
 [ 9183.9   7682.67]
 [ 6850.84  3756.62]
 [10631.33  8413.46]
 [ 7520.36  8059.68]
 [48483.65 48970.25]
 [13570.45 12979.36]
 [10774.75 20630.28]
 [15835.93 14571.89]
 [ 5034.82  4137.52]
 [ 8507.52  8347.16]
 [40485.93 51194.56]
 [41458.73 40003.33]
 [ 3811.82  1880.49]
 [ 6625.05  5458.05]
 [ 4008.56  2867.12]
 [28821.02 20149.32]
 [57655.91 47496.49]
 [37395.84 36149.48]
 [ 8135.38 26018.95]
 [15284.74 19749.38]
 [ 8250.07  6

## Evaluating the Model Performance

In [7]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.8471969167426787