# Part 1: KNN for Regression

In [1]:
import pandas as pd
df = pd.read_csv('weight-height.csv')
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [2]:
df.Gender = df['Gender'].replace(['Male', 'Female'], [0,1])
df.head()

Unnamed: 0,Gender,Height,Weight
0,0,73.847017,241.893563
1,0,68.781904,162.310473
2,0,74.110105,212.740856
3,0,71.730978,220.04247
4,0,69.881796,206.349801


In [3]:
df.shape

(8555, 3)

In [4]:
x = df.drop('Weight', axis=1)
y = df['Weight']
x.head()

Unnamed: 0,Gender,Height
0,0,73.847017
1,0,68.781904
2,0,74.110105
3,0,71.730978
4,0,69.881796


In [5]:
y.head()

0    241.893563
1    162.310473
2    212.740856
3    220.042470
4    206.349801
Name: Weight, dtype: float64

In [6]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)
x_train.head()

Unnamed: 0,Gender,Height
553,0,67.594031
1397,0,71.601697
7934,1,62.625985
8367,1,66.136131
3320,0,71.843308


In [7]:
x_test.head()

Unnamed: 0,Gender,Height
6006,1,64.846644
1197,0,68.886367
2862,0,70.963369
6497,1,62.737189
2860,0,69.57803


In [8]:
y_train.head()

553     186.751417
1397    211.031652
7934    143.768451
8367    151.814648
3320    196.505814
Name: Weight, dtype: float64

In [9]:
y_test.head()

6006    149.668369
1197    197.642244
2862    178.551191
6497    141.343095
2860    195.322675
Name: Weight, dtype: float64

# Linear Regression

In [10]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

#create a linear regression model and train the model
lr = LinearRegression()
lr.fit(x_train,y_train)

# Predict using Linear Regression
y_train_pred = lr.predict(x_train)
y_test_pred = lr.predict(x_test)

# Evaluate Linear Regression
linear_reg_train_accuracy = lr.score(x_train, y_train)
linear_reg_test_accuracy = lr.score(x_test, y_test)
linear_reg_mse = mean_squared_error(y_test, y_test_pred)


# KNN Model

In [11]:
from sklearn.neighbors import KNeighborsRegressor
# Create a KNN regressor
knn = KNeighborsRegressor()

# Fit the KNN regressor to the training dataset
knn.fit(x_train, y_train)

# Predict using KNN Regressor
y_train_pred_knn = knn.predict(x_train)
y_test_pred_knn = knn.predict(x_test)

# Evaluate KNN Regressor
knn_reg_train_accuracy = knn.score(x_train, y_train)
knn_reg_test_accuracy = knn.score(x_test, y_test)
knn_reg_mse = mean_squared_error(y_test, y_test_pred_knn)

# Comparison of KNN & Linear Regression

In [12]:
print("Linear Regression:")
print(f"Training Accuracy: {linear_reg_train_accuracy:.2f}")
print(f"Testing Accuracy: {linear_reg_test_accuracy:.2f}")
print(f"Mean Squared Error (MSE): {linear_reg_mse:.2f}")
print("\nKNN Regressor:")
print(f"Training Accuracy: {knn_reg_train_accuracy:.2f}")
print(f"Testing Accuracy: {knn_reg_test_accuracy:.2f}")
print(f"Mean Squared Error (MSE): {knn_reg_mse:.2f}")

Linear Regression:
Training Accuracy: 0.90
Testing Accuracy: 0.91
Mean Squared Error (MSE): 96.84

KNN Regressor:
Training Accuracy: 0.92
Testing Accuracy: 0.88
Mean Squared Error (MSE): 121.34


# Part 2: KNN for Classification

In [13]:
df.head()

Unnamed: 0,Gender,Height,Weight
0,0,73.847017,241.893563
1,0,68.781904,162.310473
2,0,74.110105,212.740856
3,0,71.730978,220.04247
4,0,69.881796,206.349801


In [14]:
x1 = df.drop('Gender', axis=1)
y1 = df['Gender']
x1.head()

Unnamed: 0,Height,Weight
0,73.847017,241.893563
1,68.781904,162.310473
2,74.110105,212.740856
3,71.730978,220.04247
4,69.881796,206.349801


In [15]:
y1.head()

0    0
1    0
2    0
3    0
4    0
Name: Gender, dtype: int64

In [16]:
from sklearn.model_selection import train_test_split
x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1, test_size=0.3, random_state=42)
x1_train.head()

Unnamed: 0,Height,Weight
553,67.594031,186.751417
1397,71.601697,211.031652
7934,62.625985,143.768451
8367,66.136131,151.814648
3320,71.843308,196.505814


In [17]:
x1_test.head()

Unnamed: 0,Height,Weight
6006,64.846644,149.668369
1197,68.886367,197.642244
2862,70.963369,178.551191
6497,62.737189,141.343095
2860,69.57803,195.322675


In [18]:
y1_train.head()

553     0
1397    0
7934    1
8367    1
3320    0
Name: Gender, dtype: int64

In [19]:
y1_test.head()

6006    1
1197    0
2862    0
6497    1
2860    0
Name: Gender, dtype: int64

In [20]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Apply KNN Classifier
knn_classifier = KNeighborsClassifier(n_neighbors=3)

# Fit the KNN Classifier to the training data
knn_classifier.fit(x1_train, y1_train)

# Make predictions on the testing data
y1_pred = knn_classifier.predict(x1_test)

# Evaluate the model using accuracy
accuracy = accuracy_score(y1_test, y1_pred)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.90
