# Possum head length prediction with Polynomial Linear Regression

## Importing the libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Load dataset

In [2]:
df = pd.read_csv('possum.csv')
df.head()

Unnamed: 0,case,site,Pop,sex,age,hdlngth,skullw,totlngth,taill,footlgth,earconch,eye,chest,belly
0,1,1,Vic,m,8.0,94.1,60.4,89.0,36.0,74.5,54.5,15.2,28.0,36.0
1,2,1,Vic,f,6.0,92.5,57.6,91.5,36.5,72.5,51.2,16.0,28.5,33.0
2,3,1,Vic,f,6.0,94.0,60.0,95.5,39.0,75.4,51.9,15.5,30.0,34.0
3,4,1,Vic,f,6.0,93.2,57.1,92.0,38.0,76.1,52.2,15.2,28.0,34.0
4,5,1,Vic,f,2.0,91.5,56.3,85.5,36.0,71.0,53.2,15.1,28.5,33.0


In [3]:
X = np.reshape(df.iloc[:, 6].values, (len(df.iloc[:, 6].values), 1))
y = np.reshape(df.iloc[:, 5].values, (len(df.iloc[:, 5].values), 1))
print(X)
print(y)

[[60.4]
 [57.6]
 [60. ]
 [57.1]
 [56.3]
 [54.8]
 [58.2]
 [57.6]
 [56.3]
 [58. ]
 [57.2]
 [55.6]
 [59.9]
 [57.6]
 [57.6]
 [56. ]
 [67.7]
 [55.7]
 [55.4]
 [56.3]
 [58.1]
 [58.5]
 [56.1]
 [54.9]
 [58.5]
 [59. ]
 [54.5]
 [56.8]
 [56. ]
 [54.4]
 [54.1]
 [56.7]
 [54.6]
 [55.7]
 [57.9]
 [59.3]
 [54.8]
 [56. ]
 [51.5]
 [55. ]
 [57. ]
 [54.1]
 [55.5]
 [51.5]
 [55.9]
 [54.4]
 [54.8]
 [63.2]
 [59.2]
 [56.4]
 [59.6]
 [61. ]
 [58.1]
 [63. ]
 [63.2]
 [61.5]
 [59.4]
 [64.2]
 [62.8]
 [57.7]
 [59. ]
 [58. ]
 [56.4]
 [56.5]
 [57.4]
 [55.8]
 [57.6]
 [56. ]
 [55.6]
 [56.4]
 [57.6]
 [52.4]
 [52.3]
 [52. ]
 [58.1]
 [56.8]
 [56.2]
 [51. ]
 [50. ]
 [52.6]
 [56. ]
 [54. ]
 [53.8]
 [54.6]
 [56.2]
 [53.2]
 [60.7]
 [58. ]
 [58.4]
 [54.6]
 [59.6]
 [56.3]
 [54. ]
 [57.6]
 [56.6]
 [55.7]
 [53.1]
 [68.6]
 [56.2]
 [56. ]
 [54.7]
 [55. ]
 [55.2]
 [59.9]]
[[ 94.1]
 [ 92.5]
 [ 94. ]
 [ 93.2]
 [ 91.5]
 [ 93.1]
 [ 95.3]
 [ 94.8]
 [ 93.4]
 [ 91.8]
 [ 93.3]
 [ 94.9]
 [ 95.1]
 [ 95.4]
 [ 92.9]
 [ 91.6]
 [ 94.7]
 [ 93.5]
 [ 94

## Splitting the dataset into training set and testset

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

## Training the Polynomial Regression model on the training dataset

In [5]:
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=4)
X_poly_train = poly_reg.fit_transform(X_train)
print(X_poly_train)

[[1.00000000e+00 5.82000000e+01 3.38724000e+03 1.97137368e+05
  1.14733948e+07]
 [1.00000000e+00 6.07000000e+01 3.68449000e+03 2.23648543e+05
  1.35754666e+07]
 [1.00000000e+00 5.62000000e+01 3.15844000e+03 1.77504328e+05
  9.97574323e+06]
 [1.00000000e+00 5.26000000e+01 2.76676000e+03 1.45531576e+05
  7.65496090e+06]
 [1.00000000e+00 5.65000000e+01 3.19225000e+03 1.80362125e+05
  1.01904601e+07]
 [1.00000000e+00 6.86000000e+01 4.70596000e+03 3.22828856e+05
  2.21460595e+07]
 [1.00000000e+00 5.96000000e+01 3.55216000e+03 2.11708736e+05
  1.26178407e+07]
 [1.00000000e+00 5.24000000e+01 2.74576000e+03 1.43877824e+05
  7.53919798e+06]
 [1.00000000e+00 6.30000000e+01 3.96900000e+03 2.50047000e+05
  1.57529610e+07]
 [1.00000000e+00 5.68000000e+01 3.22624000e+03 1.83250432e+05
  1.04086245e+07]
 [1.00000000e+00 5.54000000e+01 3.06916000e+03 1.70031464e+05
  9.41974311e+06]
 [1.00000000e+00 5.77000000e+01 3.32929000e+03 1.92100033e+05
  1.10841719e+07]
 [1.00000000e+00 5.46000000e+01 2.981160

In [6]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_poly_train, y_train)

LinearRegression()

## Predicting the test set

In [7]:
y_pred = regressor.predict(poly_reg.transform(X_test))

## Calculating the r2-Score

In [8]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.539168267714365