### 1. Importing Libraries and Dataset

In [400]:
import pandas as pd

In [401]:
df = pd.read_csv('weight-height.csv')
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [402]:
df = pd.get_dummies(df, columns=['Gender'], drop_first=True)
df.head()

Unnamed: 0,Height,Weight,Gender_Male
0,73.847017,241.893563,True
1,68.781904,162.310473,True
2,74.110105,212.740856,True
3,71.730978,220.04247,True
4,69.881796,206.349801,True


### 2. Separating X (Gender, Height) and Y (Weight)

In [403]:
x = df.drop('Weight', axis = 1)
x.head()

Unnamed: 0,Height,Gender_Male
0,73.847017,True
1,68.781904,True
2,74.110105,True
3,71.730978,True
4,69.881796,True


In [404]:
y = df[['Weight']]
y.head()

Unnamed: 0,Weight
0,241.893563
1,162.310473
2,212.740856
3,220.04247
4,206.349801


### 3. Splitting the dataset into the Training set and Test set
Train = 70%, Test = 30%

In [405]:
from sklearn.model_selection import train_test_split

In [406]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.30)

In [407]:
df.shape

(8555, 3)

In [408]:
xtrain.shape #70% data

(5988, 2)

In [409]:
ytest

Unnamed: 0,Weight
1463,161.952431
4556,186.879193
1241,185.469881
3663,190.740873
5228,113.060720
...,...
6543,118.325883
1891,197.679446
7231,126.584642
8242,101.173339


### 4. Applying Linear Regression Model

In [410]:
from sklearn.linear_model import LinearRegression

In [411]:
reg = LinearRegression()

In [412]:
reg.fit(xtrain, ytrain) # training ml model with train data

In [413]:
reg.predict(xtest)

array([[161.34136769],
       [175.5828118 ],
       [196.16265672],
       ...,
       [140.30350809],
       [118.5112069 ],
       [195.01995599]])

In [414]:
ytest['Predicted_Weight'] = reg.predict(xtest)
ytest

Unnamed: 0,Weight,Predicted_Weight
1463,161.952431,161.341368
4556,186.879193,175.582812
1241,185.469881,196.162657
3663,190.740873,178.648485
5228,113.060720,101.752038
...,...,...
6543,118.325883,124.239137
1891,197.679446,185.676240
7231,126.584642,140.303508
8242,101.173339,118.511207


In [415]:
ytest

Unnamed: 0,Weight,Predicted_Weight
1463,161.952431,161.341368
4556,186.879193,175.582812
1241,185.469881,196.162657
3663,190.740873,178.648485
5228,113.060720,101.752038
...,...,...
6543,118.325883,124.239137
1891,197.679446,185.676240
7231,126.584642,140.303508
8242,101.173339,118.511207


In [416]:
ytest.drop('Predicted_Weight', axis=1, inplace=True)

### 5. Evaluating the Model (Testing and training Accuracy, MSE for testing)

In [417]:
#training accuracy
reg.score(xtrain, ytrain)

0.902929608667613

In [418]:
#testing accuracy
reg.score(xtest, ytest)

0.8927855764357167

In [419]:
#MSE
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(ytest, reg.predict(xtest))
mse

108.12293439457049

### 6. Applying KNN Regressor

In [420]:
from sklearn.neighbors import KNeighborsRegressor
knn = KNeighborsRegressor(n_neighbors=3)

knn.fit(xtrain, ytrain)

In [421]:
ytest['Predicted_Weight'] = knn.predict(xtest)

In [422]:
ytest

Unnamed: 0,Weight,Predicted_Weight
1463,161.952431,170.954424
4556,186.879193,173.145823
1241,185.469881,205.028281
3663,190.740873,173.208510
5228,113.060720,102.535955
...,...,...
6543,118.325883,128.722304
1891,197.679446,183.563649
7231,126.584642,142.604996
8242,101.173339,113.099819


In [423]:
ytest.drop('Predicted_Weight', axis=1, inplace=True)

### 7. Evaluating the Model (Testing and training Accuracy, MSE for testing)

In [424]:
#training accuracy
knn.score(xtrain, ytrain)

0.9331946316674564

In [425]:
#testing accuracy
knn.score(xtest, ytest)

0.8611326761248492

In [426]:
#mse
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(ytest, knn.predict(xtest))
mse

140.04405423958664

### 8. Comparing KNN & Linear Regression Model

In [427]:
compare_df = ytest.copy()

In [428]:
compare_df['knn_weight'] = knn.predict(xtest)
compare_df['lr_weight'] = reg.predict(xtest)


In [429]:
compare_df.head()

Unnamed: 0,Weight,knn_weight,lr_weight
1463,161.952431,170.954424,161.341368
4556,186.879193,173.145823,175.582812
1241,185.469881,205.028281,196.162657
3663,190.740873,173.20851,178.648485
5228,113.06072,102.535955,101.752038


In [430]:
compare_df['Difference'] = compare_df['lr_weight'] - compare_df['knn_weight']

compare_df.head()

Unnamed: 0,Weight,knn_weight,lr_weight,Difference
1463,161.952431,170.954424,161.341368,-9.613057
4556,186.879193,173.145823,175.582812,2.436989
1241,185.469881,205.028281,196.162657,-8.865625
3663,190.740873,173.20851,178.648485,5.439975
5228,113.06072,102.535955,101.752038,-0.783917
