In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import (
    mean_squared_error,
    r2_score,
    mean_absolute_error,
    explained_variance_score,
)
from joblib import dump
from math import sqrt
import os

def perform_knn_regression(X, y, n_neighbors):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    knn_model = KNeighborsRegressor(n_neighbors=n_neighbors, metric="euclidean")
    knn_model.fit(X_train, y_train)
    y_pred = knn_model.predict(X_test)

    with open("knn_results.txt", "a") as file:
        mae = mean_absolute_error(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        rmse = sqrt(mse)
        r2 = r2_score(y_test, y_pred)
        explained_variance = explained_variance_score(y_test, y_pred)
        file.write(f"\nn: {n_neighbors}\n")
        file.write(f"Mean Absolute Error: {mae:.4f}\n")
        file.write(f"Mean Squared Error: {mse:.4f}\n")
        file.write(f"Root Mean Squared Error: {rmse:.4f}\n")
        file.write(f"R^2 Score: {r2:.4f}\n")
        file.write(f"Explained Variance Score: {explained_variance:.4f}\n")

    return knn_model


csv_file = os.path.join("..", "data", "climate_change_AQI.csv")
target_var = "Ozone"
features = [
    "Longitude",
    "Latitude",
    "Sine",
    "Cosine",
    "Land_Surface_Temp(C)",
    "Sea_Ice_Extent(10^6 sq km)",
    "Sea_Surface_Temp(C)",
]
df = pd.read_csv(csv_file)
df = df.sample(frac=0.01, random_state=42)

X = df[features]
y = df["Ozone"]

for i in range(3, 29, 4):
    perform_knn_regression(X, y, i)
    print(f"KNN model and results saved for {i}")

KNN model and results saved for 3
KNN model and results saved for 7
KNN model and results saved for 11
KNN model and results saved for 15
KNN model and results saved for 19
KNN model and results saved for 23
KNN model and results saved for 27
