# Apply linear and polynomial regression to the below dataset and check the accuracy and mean squared error for the models

In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv('https://raw.githubusercontent.com/AP-State-Skill-Development-Corporation/Datasets/master/Regression/headbrain.csv')
df

Unnamed: 0,Gender,Age Range,Head Size(cm^3),Brain Weight(grams)
0,1,1,4512,1530
1,1,1,3738,1297
2,1,1,4261,1335
3,1,1,3777,1282
4,1,1,4177,1590
...,...,...,...,...
232,2,2,3214,1110
233,2,2,3394,1215
234,2,2,3233,1104
235,2,2,3352,1170


In [2]:
df.isnull().sum()

Gender                 0
Age Range              0
Head Size(cm^3)        0
Brain Weight(grams)    0
dtype: int64

In [3]:
df.duplicated()

0      False
1      False
2      False
3      False
4      False
       ...  
232    False
233    False
234    False
235    False
236    False
Length: 237, dtype: bool

In [4]:
x = df[df.columns[:-1]]
y = df['Brain Weight(grams)']

# Linear Regression

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)
model = LinearRegression()
model.fit(x_train, y_train)

LinearRegression()

In [6]:
predicted = model.predict(x_test)
predicted

array([1242.28994947, 1359.42439666, 1247.9247648 , 1215.51057307,
       1336.86029246, 1163.71797457, 1385.70785868, 1070.38741752,
       1260.14286707, 1198.5812842 , 1376.96115705, 1202.64725323,
       1337.35620684, 1274.45618401, 1265.84588242, 1255.77829168,
       1575.39510821, 1275.69596996, 1195.17808357, 1495.30493619,
       1203.78838517, 1217.49423058, 1259.18149231, 1161.88362024,
       1252.05893385, 1170.56212185, 1269.34773705, 1473.13809236,
       1216.25444463, 1495.056979  , 1379.16231776, 1286.95269747,
       1191.39052572, 1165.7698321 , 1473.13809236, 1333.29023781,
       1239.59287438, 1120.72272688, 1213.52691556, 1380.65006089,
       1180.30065224, 1398.60163251, 1265.44862205, 1341.47282504,
       1284.47312558, 1216.53285581, 1225.03160026, 1149.30600362])

In [7]:
print(r2_score(y_test, predicted)*100)

73.45676369778609


In [8]:
print(np.sqrt(mean_squared_error(y_test, predicted)))

65.9545256771439


# Polynomial Regression

In [9]:
poly = PolynomialFeatures(degree = 5)
x_poly = poly.fit_transform(x)
x_train1, x_test1, y_train1, y_test1 = train_test_split(x_poly, y, test_size = 0.2, random_state = 42)

In [10]:
poly_model = LinearRegression()
poly_model.fit(x_train1, y_train1)

LinearRegression()

In [11]:
predicted1 = poly_model.predict(x_test1)
predicted1

array([1234.84487393, 1350.16623986, 1260.49548473, 1200.58549201,
       1331.53879237, 1150.35201861, 1375.72394465, 1050.51465161,
       1257.55928873, 1200.24853768, 1353.16097445, 1204.11064187,
       1331.9155361 , 1292.93695052, 1264.79693515, 1285.87629809,
       1571.63750539, 1294.42597281, 1174.23675001, 1510.57303126,
       1206.48828213, 1203.13560043, 1268.72065598, 1157.30914566,
       1284.29308568, 1167.31076868, 1278.94764585, 1404.64800094,
       1201.54210557, 1510.27280885, 1394.53315078, 1295.65643961,
       1191.6931714 , 1241.55960468, 1404.64800094, 1347.71026904,
       1247.85684534, 1110.45724515, 1198.03252586, 1395.78330167,
       1154.58820847, 1389.71119216, 1290.0964759 , 1356.98030339,
       1293.38278752, 1221.27439129, 1272.96675347, 1111.85535641])

In [12]:
print(r2_score(y_test1, predicted1)*100)

72.06163714286187


In [13]:
print(np.sqrt(mean_squared_error(y_test1, predicted1)))

67.66563183874067
