In [1]:
from src.data_preprocessor import DataProcessor
from src.cross_validation import CrossValidation
from src.evaluation import Evaluation
from models.knn import KNN
from models.null_model import NullModelClassification, NullModelRegression
from data_configs.configs import *
import statistics
import numpy as np

config = albalone_config
data_processor = DataProcessor(config=config)
cross_validator = CrossValidation(config=config)
classification_nullmodel = NullModelClassification(config=config)
regression_nullmodel = NullModelRegression(config=config)
knn_model = KNN(config)
null_model = NullModelRegression(config=config)

### Data Load and Preprocessing ###

In [2]:
raw_data = data_processor.load_data()

data_1 = data_processor.impute_missing_values(raw_data)

data_2 = data_processor.encode_nominal_features(data_1)

data_3 = data_processor.encode_ordinal_features(data_2)

In [3]:
data_3

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings,Sex_F,Sex_I,Sex_M
0,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15,0,0,1
1,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7,0,0,1
2,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9,1,0,0
3,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10,0,0,1
4,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
4172,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11,1,0,0
4173,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10,0,0,1
4174,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9,0,0,1
4175,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10,1,0,0


## KNN Model ##

### Hyperparameter Tuning ###

In [4]:
data_train, data_val = cross_validator.random_partition(data_3, random_state=42)

In [5]:
features=['Length', 'Diameter', 'Height', 'Whole weight','Shucked weight', 'Viscera weight', 'Shell weight']

In [6]:
gamma = 1/(statistics.stdev(data_train[config['target_column']]))
gamma

0.31178449743328734

#### Tuning k ####

In [7]:
hyperparameters = np.arange(1,15,1)
scores_dict = {}

for k in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):
        
        data_train_standardized = data_processor.standardize_data(train_set_1, train_set_1, features=features)
        data_val_standardized = data_processor.standardize_data(train_set_1,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=k, gamma=gamma)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with k={k}: {average_score}")
    scores_dict[k] = average_score

best_k = min(scores_dict, key=scores_dict.get)
print(f"Best k is {best_k} with the lowest average MSE score of {scores_dict[best_k]}")


Average MSE score with k=1: 9.011244019138756
Average MSE score with k=2: 6.6930261481913735
Average MSE score with k=3: 6.01618916449827
Average MSE score with k=4: 5.730333611942119
Average MSE score with k=5: 5.561518517526219
Average MSE score with k=6: 5.4809541314959125
Average MSE score with k=7: 5.427254608054996
Average MSE score with k=8: 5.416797042781171
Average MSE score with k=9: 5.367373734824553
Average MSE score with k=10: 5.36501849792748
Average MSE score with k=11: 5.346401256003911
Average MSE score with k=12: 5.3394929506791495
Average MSE score with k=13: 5.3320039310301794
Average MSE score with k=14: 5.326054590335817
Best k is 14 with the lowest average MSE score of 5.326054590335817


#### Tuning Gamma ####

In [8]:
hyperparameters = np.arange(0.4,1.6,0.2)
scores_dict = {}

for g in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):
        
        data_train_standardized = data_processor.standardize_data(train_set_1, train_set_1, features=features)
        data_val_standardized = data_processor.standardize_data(train_set_1,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=best_k, gamma=gamma*g)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with g={round(g,2)}: {average_score}")
    scores_dict[g] = average_score

best_g = min(scores_dict, key=scores_dict.get)
print(f"Best g is {round(best_g,2)} with the lowest average MSE score of {scores_dict[best_g]}")


Average MSE score with g=0.4: 5.335759776073148
Average MSE score with g=0.6: 5.332348222206654
Average MSE score with g=0.8: 5.329107017743394
Average MSE score with g=1.0: 5.326054590335817
Average MSE score with g=1.2: 5.323209126002804
Average MSE score with g=1.4: 5.320587877144758
Average MSE score with g=1.6: 5.31820631072449
Best g is 1.6 with the lowest average MSE score of 5.31820631072449


### Model Performance ###

In [9]:
mse_scores =[]
mae_scores = []
r2_scores = []
pearson_scores = []
null_model_scores = []

# Lists for storing null model metrics
null_model_mae_scores = []
null_model_r2_scores = []
null_model_pearson_scores = []

for i, (train_set, test_set) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

    data_train_standardized = data_processor.standardize_data(train_set, train_set, features=features)
    data_test_standardized = data_processor.standardize_data(train_set,test_set,features=features)  

    # Train and evaluate 
    predictions_1 = knn_model.knn_regression(data_test_standardized, data_train_standardized, k=best_k, gamma=best_g*gamma)['Predicted Value']
    
    mse_score = Evaluation.mean_squared_error(data_test_standardized[config['target_column']], predictions_1)
    mae_score = Evaluation.mean_absolute_error(data_test_standardized[config['target_column']], predictions_1)
    r2_score = Evaluation.r2_coefficient(data_test_standardized[config['target_column']], predictions_1)
    pearson_score = Evaluation.pearsons_correlation(data_test_standardized[config['target_column']], predictions_1)
    
    mse_scores.append(mse_score)
    mae_scores.append(mae_score)
    r2_scores.append(r2_score)
    pearson_scores.append(pearson_score)

    # Evaluate null model
    null_model_prediction = null_model.naive_regression(test_set)
    null_model_mse = Evaluation.mean_squared_error(test_set[config['target_column']], null_model_prediction)
    null_model_mae = Evaluation.mean_absolute_error(test_set[config['target_column']], null_model_prediction)
    null_model_r2 = Evaluation.r2_coefficient(test_set[config['target_column']], null_model_prediction)
    null_model_pearson = Evaluation.pearsons_correlation(test_set[config['target_column']], null_model_prediction)
    
    null_model_scores.append(null_model_mse)
    null_model_mae_scores.append(null_model_mae)
    null_model_r2_scores.append(null_model_r2)
    null_model_pearson_scores.append(null_model_pearson)


average_mse_score = sum(mse_scores) / len(mse_scores)
average_mae_score = sum(mae_scores) / len(mae_scores)
average_r2_score = sum(r2_scores) / len(r2_scores)
average_pearson_score = sum(pearson_scores) / len(pearson_scores)
average_null_model_mse = sum(null_model_scores) / len(null_model_scores)
average_null_model_mae = sum(null_model_mae_scores) / len(null_model_mae_scores)
average_null_model_r2 = sum(null_model_r2_scores) / len(null_model_r2_scores)
average_null_model_pearson = sum(null_model_pearson_scores) / len(null_model_pearson_scores)

print(f"Average null model MSE score: {average_null_model_mse}")
print(f"Average null model MAE score: {average_null_model_mae}")
print(f"Average null model r2 score: {average_null_model_r2}")
print(f"Average null model pearson score: {average_null_model_pearson}")
print(f"Average MSE score for k={best_k}, g={round(best_g,2)}: {average_mse_score}")
print(f"Average MAE score for k={best_k}, g={round(best_g,2)}: {average_mae_score}")
print(f"Average r2 score for k={best_k}, g={round(best_g,2)}: {average_r2_score}")
print(f"Average pearson score for k={best_k}, g={round(best_g,2)}: {average_pearson_score}")



Average null model MSE score: 10.280610937065877
Average null model MAE score: 2.357776660660252
Average null model r2 score: 0.0
Average null model pearson score: nan
Average MSE score for k=14, g=1.6: 5.1089882327866265
Average MAE score for k=14, g=1.6: 1.5702938978324343
Average r2 score for k=14, g=1.6: 0.5031724623897341
Average pearson score for k=14, g=1.6: 0.713623735959675




## Edited KNN ##

### Hyperparameter Tuning ###

In [10]:
epsilon = statistics.stdev(data_3[config['target_column']])
epsilon

3.224169032068128

#### Tuning Epsilon ####

In [11]:
hyperparameters = np.arange(0.1,2,0.2)
scores_dict = {}

for e in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):
        
        edited_train_set = knn_model.edited_knn_regression(train_set_1, train_set_2, epsilon=epsilon*e, gamma=gamma)

        data_train_standardized = data_processor.standardize_data(edited_train_set, edited_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(edited_train_set,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=best_k, gamma=gamma*best_g)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with e={round(e,2)}: {average_score}")
    scores_dict[e] = average_score

best_e = min(scores_dict, key=scores_dict.get)
print(f"Best e is {round(best_e,2)} with the lowest average MSE score of {scores_dict[best_e]}")


Average MSE score with e=0.1: 7.019759180527191
Average MSE score with e=0.3: 7.019759180527191
Average MSE score with e=0.5: 6.437103273868982
Average MSE score with e=0.7: 6.1841592565606645
Average MSE score with e=0.9: 6.1841592565606645
Average MSE score with e=1.1: 5.8807633841670635
Average MSE score with e=1.3: 5.725964286549791
Average MSE score with e=1.5: 5.725964286549791
Average MSE score with e=1.7: 5.5846453107327765
Average MSE score with e=1.9: 5.500619411995783
Best e is 1.9 with the lowest average MSE score of 5.500619411995783


#### Tuning k ####

In [12]:
hyperparameters = np.arange(1,10,1)
scores_dict = {}

for k in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):
        
        edited_train_set = knn_model.edited_knn_regression(train_set_1, train_set_2, epsilon=epsilon*best_e, gamma=gamma)

        data_train_standardized = data_processor.standardize_data(edited_train_set, edited_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(edited_train_set,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=k, gamma=gamma*best_g)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with k={k}: {average_score}")
    scores_dict[k] = average_score

best_k = min(scores_dict, key=scores_dict.get)
print(f"Best k is {best_k} with the lowest average MSE score of {scores_dict[best_k]}")


Average MSE score with k=1: 8.020933014354068
Average MSE score with k=2: 6.350068183753953
Average MSE score with k=3: 5.83803300687887
Average MSE score with k=4: 5.6754306068453415
Average MSE score with k=5: 5.5643224475117155
Average MSE score with k=6: 5.531634711156073
Average MSE score with k=7: 5.523789248455772
Average MSE score with k=8: 5.509620763446195
Average MSE score with k=9: 5.489818282949789
Best k is 9 with the lowest average MSE score of 5.489818282949789


#### Tuning gamma ####

In [13]:
hyperparameters = np.arange(0.4,1.6,0.2)
scores_dict = {}

for g in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):
        
        edited_train_set = knn_model.edited_knn_regression(train_set_1, train_set_2, epsilon=epsilon*best_e, gamma=gamma)

        data_train_standardized = data_processor.standardize_data(edited_train_set, edited_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(edited_train_set,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=best_k, gamma=g*gamma)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with g={round(g,2)}: {average_score}")
    scores_dict[g] = average_score

best_g = min(scores_dict, key=scores_dict.get)
print(f"Best g is {round(best_g,2)} with the lowest average MSE score of {scores_dict[best_g]}")


Average MSE score with g=0.4: 5.502603329803368
Average MSE score with g=0.6: 5.50007590463503
Average MSE score with g=0.8: 5.49769890532044
Average MSE score with g=1.0: 5.495480588657418
Average MSE score with g=1.2: 5.493426624677425
Average MSE score with g=1.4: 5.491539527782172
Average MSE score with g=1.6: 5.489818282949789
Best g is 1.6 with the lowest average MSE score of 5.489818282949789


In [14]:
mse_scores = []
mae_scores = []
r2_scores = []
pearson_scores = []
null_model_scores = []

# Lists to store null model metrics for MAE, R^2, and Pearson
null_model_mae_scores = []
null_model_r2_scores = []
null_model_pearson_scores = []

for i, (train_set, test_set) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

    edited_train_set = knn_model.edited_knn_regression(train_set, data_val, epsilon=epsilon*best_e, gamma=gamma)

    data_train_standardized = data_processor.standardize_data(edited_train_set, edited_train_set, features=features)
    data_test_standardized = data_processor.standardize_data(edited_train_set,test_set,features=features)  

    predictions_1 = knn_model.knn_regression(data_test_standardized, data_train_standardized, k=best_k, gamma=best_g*gamma)['Predicted Value']

    mse_score = Evaluation.mean_squared_error(data_test_standardized[config['target_column']], predictions_1)
    mae_score = Evaluation.mean_absolute_error(data_test_standardized[config['target_column']], predictions_1)
    r2_score = Evaluation.r2_coefficient(data_test_standardized[config['target_column']], predictions_1)
    pearson_score = Evaluation.pearsons_correlation(data_test_standardized[config['target_column']], predictions_1)

    mse_scores.append(mse_score)
    mae_scores.append(mae_score)
    r2_scores.append(r2_score)
    pearson_scores.append(pearson_score)
    
    # Null model evaluation
    null_model_prediction = null_model.naive_regression(test_set)
    null_model_mse = Evaluation.mean_squared_error(test_set[config['target_column']], null_model_prediction)
    null_model_mae = Evaluation.mean_absolute_error(test_set[config['target_column']], null_model_prediction)
    null_model_r2 = Evaluation.r2_coefficient(test_set[config['target_column']], null_model_prediction)
    null_model_pearson = Evaluation.pearsons_correlation(test_set[config['target_column']], null_model_prediction)

    null_model_scores.append(null_model_mse)
    null_model_mae_scores.append(null_model_mae)
    null_model_r2_scores.append(null_model_r2)
    null_model_pearson_scores.append(null_model_pearson)

average_mse_score = sum(mse_scores) / len(mse_scores)
average_mae_score = sum(mae_scores) / len(mae_scores)
average_r2_score = sum(r2_scores) / len(r2_scores)
average_pearson_score = sum(pearson_scores) / len(pearson_scores)
average_null_model_mse = sum(null_model_scores) / len(null_model_scores)
average_null_model_mae = sum(null_model_mae_scores) / len(null_model_mae_scores)
average_null_model_r2 = sum(null_model_r2_scores) / len(null_model_r2_scores)
average_null_model_pearson = sum(null_model_pearson_scores) / len(null_model_pearson_scores)

print(f"Average null model MSE score: {average_null_model_mse}")
print(f"Average null model MAE score: {average_null_model_mae}")
print(f"Average null model R2 score: {average_null_model_r2}")
print(f"Average null model Pearson score: {average_null_model_pearson}")
print(f"Average MSE score for k={best_k}, g={round(best_g,2)}: {average_mse_score}")
print(f"Average MAE score for k={best_k}, g={round(best_g,2)}: {average_mae_score}")
print(f"Average R2 score for k={best_k}, g={round(best_g,2)}: {average_r2_score}")
print(f"Average Pearson score for k={best_k}, g={round(best_g,2)}: {average_pearson_score}")




Average null model MSE score: 10.280610937065877
Average null model MAE score: 2.357776660660252
Average null model R2 score: 0.0
Average null model Pearson score: nan
Average MSE score for k=9, g=1.6: 5.283134708476315
Average MAE score for k=9, g=1.6: 1.5763915123066348
Average R2 score for k=9, g=1.6: 0.48625131612526795
Average Pearson score for k=9, g=1.6: 0.7069709024182674




## Condensed Knn ##

#### Tuning Epsilon ####

In [15]:
hyperparameters = np.arange(0.1,1.5,0.1)
scores_dict = {}

for e in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):
        
        condesed_train_set = knn_model.condensed_knn_regression(train_set_1,epsilon=epsilon*e)

        data_train_standardized = data_processor.standardize_data(condesed_train_set, condesed_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(condesed_train_set,data_val,features=features)

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=best_k, gamma=gamma*best_g)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with e={round(e,2)}: {average_score}")
    scores_dict[e] = average_score

best_e = min(scores_dict, key=scores_dict.get)
print(f"Best e is {round(best_e,2)} with the lowest average MSE score of {scores_dict[best_e]}")


Average MSE score with e=0.1: 5.363871157313764
Average MSE score with e=0.2: 5.3636450283162524
Average MSE score with e=0.3: 5.365356633991318
Average MSE score with e=0.4: 5.470011141323902
Average MSE score with e=0.5: 5.463391054714149
Average MSE score with e=0.6: 5.4652651988622045
Average MSE score with e=0.7: 5.71327257684247
Average MSE score with e=0.8: 5.712733942048611
Average MSE score with e=0.9: 5.713396478881825
Average MSE score with e=1.0: 5.981877926876671
Average MSE score with e=1.1: 5.988532079011377
Average MSE score with e=1.2: 6.011768147162297
Average MSE score with e=1.3: 6.269499103256668
Average MSE score with e=1.4: 6.335307124091774
Best e is 0.2 with the lowest average MSE score of 5.3636450283162524


#### Tuning k ####

In [16]:
hyperparameters = np.arange(1,10,1)
scores_dict = {}

for k in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):
        
        condesed_train_set = knn_model.condensed_knn_regression(train_set_1,epsilon=epsilon*best_e)

        data_train_standardized = data_processor.standardize_data(condesed_train_set, condesed_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(condesed_train_set,data_val,features=features)

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=k, gamma=gamma*best_g)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with k={k}: {average_score}")
    scores_dict[k] = average_score

best_k = min(scores_dict, key=scores_dict.get)
print(f"Best k is {best_k} with the lowest average MSE score of {scores_dict[best_k]}")


Average MSE score with k=1: 9.176076555023922
Average MSE score with k=2: 6.763807494787376
Average MSE score with k=3: 6.057933035967641
Average MSE score with k=4: 5.7698409180039025
Average MSE score with k=5: 5.588689898856752
Average MSE score with k=6: 5.509740948595235
Average MSE score with k=7: 5.452569515604357
Average MSE score with k=8: 5.40876597250405
Average MSE score with k=9: 5.364262796288996
Best k is 9 with the lowest average MSE score of 5.364262796288996


#### Tuning Gamma ####

In [17]:
hyperparameters = np.arange(0.4,1.6,0.2)
scores_dict = {}

for g in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):
        
        condesed_train_set = knn_model.condensed_knn_regression(train_set_1,epsilon=epsilon*best_e)

        data_train_standardized = data_processor.standardize_data(condesed_train_set, condesed_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(condesed_train_set,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=best_k, gamma=g*gamma)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with g={round(g,2)}: {average_score}")
    scores_dict[g] = average_score

best_g = min(scores_dict, key=scores_dict.get)
print(f"Best g is {round(best_g,2)} with the lowest average MSE score of {scores_dict[best_g]}")


Average MSE score with g=0.4: 5.374235778263465
Average MSE score with g=0.6: 5.370587960283816
Average MSE score with g=0.8: 5.370217536397965
Average MSE score with g=1.0: 5.369253859382793
Average MSE score with g=1.2: 5.366226276836631
Average MSE score with g=1.4: 5.365978528240339
Average MSE score with g=1.6: 5.362337614264318
Best g is 1.6 with the lowest average MSE score of 5.362337614264318


### Model Performance ###

In [18]:
mse_scores = []
mae_scores = []
r2_scores = []
pearson_scores = []
null_model_scores = []

# Lists to store null model metrics for MAE, R^2, and Pearson
null_model_mae_scores = []
null_model_r2_scores = []
null_model_pearson_scores = []

for i, (train_set, test_set) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

    condesed_train_set = knn_model.condensed_knn_regression(train_set,epsilon=epsilon*best_e)

    data_train_standardized = data_processor.standardize_data(condesed_train_set, condesed_train_set, features=features)
    data_test_standardized = data_processor.standardize_data(edited_train_set,test_set,features=features)  

    predictions_1 = knn_model.knn_regression(data_test_standardized, data_train_standardized, k=best_k, gamma=best_g*gamma)['Predicted Value']

    mse_score = Evaluation.mean_squared_error(data_test_standardized[config['target_column']], predictions_1)
    mae_score = Evaluation.mean_absolute_error(data_test_standardized[config['target_column']], predictions_1)
    r2_score = Evaluation.r2_coefficient(data_test_standardized[config['target_column']], predictions_1)
    pearson_score = Evaluation.pearsons_correlation(data_test_standardized[config['target_column']], predictions_1)

    mse_scores.append(mse_score)
    mae_scores.append(mae_score)
    r2_scores.append(r2_score)
    pearson_scores.append(pearson_score)
    
    # Null model evaluation
    null_model_prediction = null_model.naive_regression(test_set)
    null_model_mse = Evaluation.mean_squared_error(test_set[config['target_column']], null_model_prediction)
    null_model_mae = Evaluation.mean_absolute_error(test_set[config['target_column']], null_model_prediction)
    null_model_r2 = Evaluation.r2_coefficient(test_set[config['target_column']], null_model_prediction)
    null_model_pearson = Evaluation.pearsons_correlation(test_set[config['target_column']], null_model_prediction)

    null_model_scores.append(null_model_mse)
    null_model_mae_scores.append(null_model_mae)
    null_model_r2_scores.append(null_model_r2)
    null_model_pearson_scores.append(null_model_pearson)


average_mse_score = sum(mse_scores) / len(mse_scores)
average_mae_score = sum(mae_scores) / len(mae_scores)
average_r2_score = sum(r2_scores) / len(r2_scores)
average_pearson_score = sum(pearson_scores) / len(pearson_scores)
average_null_model_mse = sum(null_model_scores) / len(null_model_scores)
average_null_model_mae = sum(null_model_mae_scores) / len(null_model_mae_scores)
average_null_model_r2 = sum(null_model_r2_scores) / len(null_model_r2_scores)
average_null_model_pearson = sum(null_model_pearson_scores) / len(null_model_pearson_scores)


print(f"Average null model MSE score: {average_null_model_mse}")
print(f"Average null model MAE score: {average_null_model_mae}")
print(f"Average null model R2 score: {average_null_model_r2}")
print(f"Average null model Pearson score: {average_null_model_pearson}")
print(f"Average MSE score for k={best_k}, g={round(best_g,2)}: {average_mse_score}")
print(f"Average MAE score for k={best_k}, g={round(best_g,2)}: {average_mae_score}")
print(f"Average R2 score for k={best_k}, g={round(best_g,2)}: {average_r2_score}")
print(f"Average Pearson score for k={best_k}, g={round(best_g,2)}: {average_pearson_score}")



Average null model MSE score: 10.280610937065877
Average null model MAE score: 2.357776660660252
Average null model R2 score: 0.0
Average null model Pearson score: nan
Average MSE score for k=9, g=1.6: 5.187008768531422
Average MAE score for k=9, g=1.6: 1.6263701693198604
Average R2 score for k=9, g=1.6: 0.49533757134630657
Average Pearson score for k=9, g=1.6: 0.7051203958427629


