In [1]:
import pandas as pd

df = pd.read_csv('advertising.csv')
X, y = df.iloc[:, :3], df.iloc[:, 3]

In [2]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import KFold
import numpy as np

def nmse(true, pred):
    return np.mean((true - pred)**2) / np.var(true)

raw_error = 0.0
kf = KFold(shuffle=True, random_state=42)

for train_idx, test_idx in kf.split(X, y):
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]
    
    knn = KNeighborsRegressor().fit(X_train, y_train)
    raw_error += nmse(knn.predict(X_test), y_test.to_numpy())

In [3]:
from sklearn.preprocessing import StandardScaler

scaled_error = 0.0
kf = KFold(shuffle=True, random_state=42)

for train_idx, test_idx in kf.split(X, y):
    X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
    X_test, y_test = X.iloc[test_idx], y.iloc[test_idx]
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    knn = KNeighborsRegressor().fit(X_train, y_train)
    scaled_error += nmse(knn.predict(X_test), y_test.to_numpy())

In [4]:
print(f"Raw NMSE: {raw_error / 5:.4f}")
print(f"Scaled NMSE: {scaled_error / 5:.4f}")

Raw NMSE: 0.1078
Scaled NMSE: 0.1435


In [8]:
from sklearn.preprocessing import StandardScaler

X_new = X.drop('Newspaper', axis=1)
kf = KFold(shuffle=True, random_state=42)

raw_error_wn = 0.0
for train_idx, test_idx in kf.split(X, y):
    X_train, y_train = X_new.iloc[train_idx], y.iloc[train_idx]
    X_test, y_test = X_new.iloc[test_idx], y.iloc[test_idx]
    
    knn = KNeighborsRegressor().fit(X_train, y_train)
    raw_error_wn += nmse(knn.predict(X_test), y_test.to_numpy())

scaled_error_wn = 0.0
for train_idx, test_idx in kf.split(X, y):
    X_train, y_train = X_new.iloc[train_idx], y.iloc[train_idx]
    X_test, y_test = X_new.iloc[test_idx], y.iloc[test_idx]
    
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    knn = KNeighborsRegressor().fit(X_train, y_train)
    scaled_error_wn += nmse(knn.predict(X_test), y_test.to_numpy())
    
print(f"Raw NMSE (without newspaper): {raw_error_wn / 5:.4f}")
print(f"Scaled NMSE (without newspaper): {scaled_error_wn / 5:.4f}")

Raw NMSE (without newspaper): 0.0774
Scaled NMSE (without newspaper): 0.0746
