In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score

# 1 & 2
df = pd.read_csv("weight-height.csv")
X = df[["Height"]]
y = df["Weight"]

# 3
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4
knn = KNeighborsRegressor(n_neighbors=5)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
r2_no_scale = r2_score(y_test, y_pred)

# 5
scaler_norm = MinMaxScaler()
X_train_norm = scaler_norm.fit_transform(X_train)
X_test_norm = scaler_norm.transform(X_test)
knn.fit(X_train_norm, y_train)
y_pred_norm = knn.predict(X_test_norm)
r2_norm = r2_score(y_test, y_pred_norm)

# 6
scaler_std = StandardScaler()
X_train_std = scaler_std.fit_transform(X_train)
X_test_std = scaler_std.transform(X_test)
knn.fit(X_train_std, y_train)
y_pred_std = knn.predict(X_test_std)
r2_std = r2_score(y_test, y_pred_std)

# 7
print(f"R^2 without scaling: {r2_no_scale:.4f}")
print(f"R^2 with normalization: {r2_norm:.4f}")
print(f"R^2 with standardization: {r2_std:.4f}")
print("\nConclusion: Scaling usually improves KNN performance as it relies on distances.")


R^2 without scaling: 0.8346
R^2 with normalization: 0.8346
R^2 with standardization: 0.8346

Conclusion: Scaling usually improves KNN performance as it relies on distances.
