# **Polynomial Regression**

## *Data Import*

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv("community_healty_with_bmi.csv")

In [3]:
df.head()

Unnamed: 0.1,Unnamed: 0,age,gender,race/ethnicity,weight,height,visits,bmi
0,0,47,0,3,111.0,165.0,28,40.77
1,1,26,1,2,127.0,168.0,23,45.0
2,2,26,1,1,90.0,173.0,14,30.07
3,3,39,1,2,107.0,185.0,21,31.26
4,4,55,1,0,90.0,155.0,27,37.46


In [4]:
df.drop("Unnamed: 0", inplace=True, axis=1)

In [5]:
df.head()

Unnamed: 0,age,gender,race/ethnicity,weight,height,visits,bmi
0,47,0,3,111.0,165.0,28,40.77
1,26,1,2,127.0,168.0,23,45.0
2,26,1,1,90.0,173.0,14,30.07
3,39,1,2,107.0,185.0,21,31.26
4,55,1,0,90.0,155.0,27,37.46


## *Ploynomial Features*

In [6]:
X = df.drop("visits", axis=1)
y = df["visits"]

In [7]:
from sklearn.preprocessing import PolynomialFeatures

In [8]:
poly_conv = PolynomialFeatures(degree=2, include_bias=False)

In [9]:
poly_conv

In [10]:
X.shape

(1000, 6)

In [11]:
X.head()

Unnamed: 0,age,gender,race/ethnicity,weight,height,bmi
0,47,0,3,111.0,165.0,40.77
1,26,1,2,127.0,168.0,45.0
2,26,1,1,90.0,173.0,30.07
3,39,1,2,107.0,185.0,31.26
4,55,1,0,90.0,155.0,37.46


In [12]:
poly_feat = poly_conv.fit_transform(X)

In [13]:
poly_feat.shape

(1000, 27)

## *Train Test Split*

In [14]:
from sklearn.model_selection import train_test_split

In [15]:
X_train, X_test, y_train, y_test = train_test_split(poly_feat, y, test_size=0.3, random_state=99)

In [16]:
from sklearn.linear_model import LinearRegression

In [17]:
model = LinearRegression(fit_intercept=True)

In [18]:
model.fit(X_train, y_train)

In [19]:
X_train

array([[4.5000000e+01, 0.0000000e+00, 3.0000000e+00, ..., 3.8416000e+04,
        5.1018800e+03, 6.7756090e+02],
       [2.1000000e+01, 1.0000000e+00, 4.0000000e+00, ..., 3.1684000e+04,
        4.7187800e+03, 7.0278010e+02],
       [4.7000000e+01, 1.0000000e+00, 1.0000000e+00, ..., 3.1684000e+04,
        3.4834600e+03, 3.8298490e+02],
       ...,
       [5.0000000e+01, 1.0000000e+00, 0.0000000e+00, ..., 3.6100000e+04,
        3.4732000e+03, 3.3415840e+02],
       [5.0000000e+01, 1.0000000e+00, 3.0000000e+00, ..., 2.8224000e+04,
        5.3575200e+03, 1.0169721e+03],
       [3.8000000e+01, 0.0000000e+00, 0.0000000e+00, ..., 2.8224000e+04,
        4.6435200e+03, 7.6396960e+02]])

## *Prediction*

In [20]:
poly_pred = model.predict(X_test)

In [21]:
poly_pred

array([25.61683382, 16.27359252, 15.73894567, 11.79983937, 28.74592571,
       12.14550334, 18.35431868, 15.38342566, 26.19541461, 15.95964819,
       13.4385745 , 16.89027665, 17.55579011, 13.36310463, 27.58492887,
       11.69459269, 17.59860679, 21.84236479, 23.87240349, 12.58662041,
       11.69258705, 31.31004233, 19.30644428, 16.61708477, 21.45832677,
       23.09012148, 21.9281036 , 14.19670367, 33.41658689, 16.2922813 ,
       17.85668496, 28.44592584, 22.62646723, 30.70465824, 27.27376183,
       13.22935259, 18.22171876, 20.75881083, 10.99763181, 24.24861097,
       19.16884439, 18.1976085 , 25.29840328, 20.55176945, 13.06493573,
       16.74001776, 24.7864396 ,  8.70533077, 19.7524904 , 27.51318698,
       20.40450035, 23.6819784 , 22.55857128, 16.19712699,  8.04153129,
       17.82577471, 14.84196168, 16.91151952, 30.83271261, 21.74636434,
       21.51133895, 18.47106883, 12.71183781, 24.76097857, 17.41973259,
       14.43011828, 19.49130452, 13.93871557, 15.85693948, 37.02

## *Performance*

In [22]:
from sklearn.metrics import mean_absolute_error, mean_squared_error,r2_score

In [23]:
mae = mean_absolute_error(y_test, poly_pred)
r2 = r2_score(y_test, poly_pred)
rmse = np.sqrt(mean_squared_error(y_test, poly_pred))

In [24]:
print("MAE  : ", round(mae, 2))
print("RMSE : ", round(rmse, 2))
print("R^2  : ", round(r2, 2))

MAE  :  2.15
RMSE :  2.52
R^2  :  0.85


**Linear Prediction is much better that Polynomial Prediction for this dataset.**