In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, explained_variance_score
from joblib import dump
from math import sqrt

def perform_knn_regression(X, y, cityname, n_neighbors=5):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    knn_model = KNeighborsRegressor(n_neighbors=n_neighbors, metric='euclidean')
    knn_model.fit(X_train, y_train)
    y_pred = knn_model.predict(X_test)

    with open('knn_results_' + cityname + '.txt', 'w') as file:
        mae = mean_absolute_error(y_test, y_pred)
        mse = mean_squared_error(y_test, y_pred)
        rmse = sqrt(mse)
        r2 = r2_score(y_test, y_pred)
        explained_variance = explained_variance_score(y_test, y_pred)

        file.write(f"Mean Absolute Error: {mae:.2f}\n")
        file.write(f"Mean Squared Error: {mse:.2f}\n")
        file.write(f"Root Mean Squared Error: {rmse:.4f}\n")
        file.write(f"R^2 Score: {r2:.4f}\n")
        file.write(f"Explained Variance Score: {explained_variance:.4f}\n")

    return knn_model

def removeMissingRows(df, column_names):
    df = df.dropna(subset=column_names)
    return df

cities = ['Bakersfield', 'Los_Angeles', 'New_York', 'Phoenix', 'Reno', 'Visalia', 'Denver', 'Boston']
for city in cities:
    df = pd.read_csv('../csv/final_merged_data/merged_data_' + city + '.csv')
    numerical_features = ['temp', 'visibility', 'dew_point', 'feels_like', 'pressure', 'humidity', 'wind_speed', 'wind_deg', 'wind_gust', 'rain_1h', 'snow_1h', 'clouds_all', 'weather_id', 'year', 'month', 'day']
    df = removeMissingRows(df, numerical_features + ['aqi'])  # Include 'aqi' in the check for NaNs
    X = df[numerical_features]
    y = df['aqi']
    knn_model = perform_knn_regression(X, y, city)
    dump(knn_model, 'knn_model_' + city + '.joblib')
    print(f'KNN model and results saved for {city}')


  df = pd.read_csv('../csv/final_merged_data/merged_data_' + city + '.csv')


KNN model and results saved for Bakersfield


  df = pd.read_csv('../csv/final_merged_data/merged_data_' + city + '.csv')


KNN model and results saved for Los_Angeles


  df = pd.read_csv('../csv/final_merged_data/merged_data_' + city + '.csv')


KNN model and results saved for New_York


  df = pd.read_csv('../csv/final_merged_data/merged_data_' + city + '.csv')


KNN model and results saved for Phoenix


  df = pd.read_csv('../csv/final_merged_data/merged_data_' + city + '.csv')


KNN model and results saved for Reno
KNN model and results saved for Visalia


  df = pd.read_csv('../csv/final_merged_data/merged_data_' + city + '.csv')


KNN model and results saved for Denver


  df = pd.read_csv('../csv/final_merged_data/merged_data_' + city + '.csv')


KNN model and results saved for Boston
