In [33]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

In [34]:
# Load dataset
data = fetch_california_housing()
X = data.data
y = data.target       


In [35]:
# Split into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [36]:
# Create KNN regressor
knn = KNeighborsRegressor()

# GridSearchCV

In [37]:
# Define hyperparameter grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9],
    'weights': ['uniform', 'distance']
}

In [38]:
# Set up K-Fold cross-validation
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

In [39]:
# Grid Search with KFold
grid_search = GridSearchCV(knn, param_grid, cv=kfold, scoring='neg_mean_squared_error', n_jobs=-1)

In [40]:
# Fit the model
grid_search.fit(X_train, y_train)

In [41]:
# Best parameters and score
print("Best Parameters:", grid_search.best_params_)
print("Best CV Score (Negative MSE):", grid_search.best_score_)

Best Parameters: {'n_neighbors': 9, 'weights': 'distance'}
Best CV Score (Negative MSE): -1.1147009722946977


In [42]:
# Evaluate on test set
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Test Set MSE:", mse)

Test Set MSE: 1.0656306203166723


# RandomizedSearchCV

In [43]:
from sklearn.model_selection import RandomizedSearchCV, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from scipy.stats import randint

In [44]:
# Define a pipeline with scaling and KNN
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsRegressor())
])

In [45]:
# Define hyperparameter distribution
param_dist = {
    'knn__n_neighbors': randint(1, 20)
}

In [46]:
# RandomizedSearchCV
random_search = RandomizedSearchCV(pipeline, param_distributions=param_dist,
                                   n_iter=10, cv=kfold, scoring='neg_mean_squared_error',
                                   random_state=42)

In [47]:
# Fit the model
random_search.fit(X, y)

In [48]:
# Best params and score
print("Best Parameters:", random_search.best_params_)
print("Best MSE:", -random_search.best_score_)

Best Parameters: {'knn__n_neighbors': 8}
Best MSE: 0.3986036156937751
