# KNN for Regression – Implementation Guidelines
1. Import data set
2. Separate X (Gender, Height) and Y (y=Weight).
3. Train = 70%, Test = 30%
4. Apply Linear Regression
5. Evaluate the Model (Testing and training Accuracy, MSE for testing)
6. Apply KNN Regressor: Scikit-Learn Link
7. Evaluate the Model (Testing and training Accuracy, MSE for testing)
8. Compare KNN & Linear Regression with the KNN Model and Linear regression as well.

In [1]:
import pandas as pd
dataset = pd.read_csv("weight-height.csv")

In [2]:
dataset

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.042470
4,Male,69.881796,206.349801
...,...,...,...
8550,Female,60.483946,110.565497
8551,Female,63.423372,129.921671
8552,Female,65.584057,155.942671
8553,Female,67.429971,151.678405


In [3]:
df = pd.DataFrame(dataset)
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


# Separate X (Gender, Height) and Y (y=Weight).

In [4]:
x = df[['Gender', 'Height']]
y = df[['Weight']]

In [5]:
x.head()

Unnamed: 0,Gender,Height
0,Male,73.847017
1,Male,68.781904
2,Male,74.110105
3,Male,71.730978
4,Male,69.881796


In [6]:
y.head()

Unnamed: 0,Weight
0,241.893563
1,162.310473
2,212.740856
3,220.04247
4,206.349801


# Converting 'Gender' to numerical values (using one-hot encoding)


In [7]:
x = pd.get_dummies(x, columns=['Gender'], drop_first=True)

In [8]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets


In [9]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=27)

# Apply Linear Regression

In [10]:
from sklearn.linear_model import LinearRegression
li_reg = LinearRegression()

In [11]:
li_reg.fit(x_train, y_train)

# Evaluate the Model (Testing and training Accuracy, MSE for testing)

In [12]:
y_test_pred =li_reg.predict(x_test)
y_train_pred = li_reg.predict(x_train)

In [13]:
from sklearn.metrics import r2_score

In [14]:
test_r2 = r2_score(y_test, y_test_pred)
train_r2 = r2_score(y_train, y_train_pred)

In [15]:
test_r2

0.902720043351082

In [16]:
train_r2

0.8986749314083868

In [17]:
from sklearn.metrics import mean_squared_error

In [30]:
mse_test_li_reg = mean_squared_error(y_test, y_test_pred)

In [31]:
mse_test_li_reg

101.74460876168067

# Apply KNN Regressor

In [20]:
from sklearn.neighbors import KNeighborsRegressor

In [21]:
knn_reg = KNeighborsRegressor(n_neighbors=3)

In [22]:
knn_reg.fit(x_train, y_train)

In [24]:
y_test_pred_knn = knn_reg.predict(x_test)
y_train_pred_knn = knn_reg.predict(x_train)

In [25]:
test_r2_knn = r2_score(y_test, y_test_pred_knn)
train_r2_knn = r2_score(y_train, y_train_pred_knn)

In [26]:
test_r2_knn

0.8697926864193555

In [27]:
train_r2_knn

0.9315469552706966

In [32]:
mse_test_knn_reg = mean_squared_error(y_test, y_test_pred_knn)

In [33]:
mse_test_knn_reg

136.1831628480635

# Compare KNN & Linear Regression with the KNN Model and Linear regression as well.

In [36]:
if mse_test_knn_reg < mse_test_li_reg:
    print("KNN performs better than Linear Regression on this dataset.")
else:
    print("Linear Regression performs better than KNN on this dataset.")

Linear Regression performs better than KNN on this dataset.


# KNN for Classification – Implementation Guidelines

1. Import data set
2. Separate X and Y. (y=Gender)
3. Train = 70%, Test = 30%
4. Apply KNN Classifier
5. Evaluate the Model by only Accuracy.

In [72]:
import pandas as pd
dataset2 = pd.read_csv("weight-height.csv")

In [73]:
dataset2

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.042470
4,Male,69.881796,206.349801
...,...,...,...
8550,Female,60.483946,110.565497
8551,Female,63.423372,129.921671
8552,Female,65.584057,155.942671
8553,Female,67.429971,151.678405


In [74]:
df = pd.DataFrame(dataset2)
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [75]:
df.tail()

Unnamed: 0,Gender,Height,Weight
8550,Female,60.483946,110.565497
8551,Female,63.423372,129.921671
8552,Female,65.584057,155.942671
8553,Female,67.429971,151.678405
8554,Female,60.921791,131.253738


In [76]:
x2 = df[['Height', 'Weight']]
y2 = df[['Gender']]

In [77]:
x2.head()

Unnamed: 0,Height,Weight
0,73.847017,241.893563
1,68.781904,162.310473
2,74.110105,212.740856
3,71.730978,220.04247
4,69.881796,206.349801


In [78]:
y2.head()

Unnamed: 0,Gender
0,Male
1,Male
2,Male
3,Male
4,Male


In [85]:
from sklearn.preprocessing import LabelEncoder

In [86]:
encoder = LabelEncoder()
y2_encoded = encoder.fit_transform(y2)

  y = column_or_1d(y, warn=True)


In [87]:
y2_encoded

array([1, 1, 1, ..., 0, 0, 0])

# Split the data into training and testing sets

In [88]:
x2_train, x2_test, y2_train, y2_test = train_test_split(x2, y2_encoded, test_size=0.3, random_state=27)

# Apply KNN Classifier.

In [89]:
from sklearn.neighbors import KNeighborsClassifier

In [90]:
knn_clasf = KNeighborsRegressor(n_neighbors=3)

In [91]:
knn_clasf.fit(x2_train, y2_train)

In [92]:
predictions = knn_clasf.predict(x2_test)

In [93]:
predictions

array([1.        , 0.33333333, 1.        , ..., 0.66666667, 0.        ,
       0.66666667])

# Evaluate the Model by only Accuracy.

In [94]:
from sklearn.metrics import accuracy_score

In [96]:
threshold = 0.5
predictions_binary = (predictions > threshold).astype(int)

In [97]:
accuracy = accuracy_score(y2_test, predictions_binary)

In [98]:
accuracy

0.9068952084144917