In [12]:
from src.data_preprocessor import DataProcessor
from src.cross_validation import CrossValidation
from src.evaluation import Evaluation
from models.knn import KNN
from models.null_model import NullModelClassification, NullModelRegression
from data_configs.configs import *
import statistics
import numpy as np

config = machine_config
data_processor = DataProcessor(config=config)
cross_validator = CrossValidation(config=config)
classification_nullmodel = NullModelClassification(config=config)
regression_nullmodel = NullModelRegression(config=config)
knn_model = KNN(config)
null_model = NullModelRegression(config=config)

### Data Load and Preprocessing ###

In [13]:
raw_data = data_processor.load_data()

raw_data_2 = raw_data.drop(columns=['vendor_name', 'model_name', 'ERP'])

data_1 = data_processor.impute_missing_values(raw_data_2)

data_2 = data_processor.encode_nominal_features(data_1)

data_3 = data_processor.encode_ordinal_features(data_2)

## KNN Model ##

### Hyperparameter Tuning ###

In [14]:
data_train, data_val = cross_validator.random_partition(data_3, random_state=42)

In [15]:
features=['MYCT', 'MMIN', 'MMAX', 'CACH', 'CHMIN', 'CHMAX']

In [16]:
gamma = 1/(statistics.stdev(data_train[config['target_column']]))
gamma

0.0071906613411797165

#### Tuning k ####

In [17]:
hyperparameters = np.arange(1,15,1)
scores_dict = {}

for k in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

        data_train_standardized = data_processor.standardize_data(train_set_1, train_set_1, features=features)
        data_val_standardized = data_processor.standardize_data(train_set_1,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=k, gamma=gamma)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with k={k}: {average_score}")
    scores_dict[k] = average_score

best_k = min(scores_dict, key=scores_dict.get)
print(f"Best k is {best_k} with the lowest average MSE score of {scores_dict[best_k]}")


Average MSE score with k=1: 18692.819047619043
Average MSE score with k=2: 17661.420809542098
Average MSE score with k=3: 18995.569669901513
Average MSE score with k=4: 19611.49917804948
Average MSE score with k=5: 20429.088741870484
Average MSE score with k=6: 21261.43363838865
Average MSE score with k=7: 21813.683190323467
Average MSE score with k=8: 22263.245673862275
Average MSE score with k=9: 23109.50510701653
Average MSE score with k=10: 24044.911264984657
Average MSE score with k=11: 24791.148339346157
Average MSE score with k=12: 25524.400740610767
Average MSE score with k=13: 26372.642408441294
Average MSE score with k=14: 27191.311096247235
Best k is 2 with the lowest average MSE score of 17661.420809542098


#### Tuning Gamma ####

In [18]:
hyperparameters = np.arange(0.4,1.6,0.2)
scores_dict = {}

for g in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

        data_train_standardized = data_processor.standardize_data(train_set_1, train_set_1, features=features)
        data_val_standardized = data_processor.standardize_data(train_set_1,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=best_k, gamma=gamma*g)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with g={round(g,2)}: {average_score}")
    scores_dict[g] = average_score

best_g = min(scores_dict, key=scores_dict.get)
print(f"Best g is {round(best_g,2)} with the lowest average MSE score of {scores_dict[best_g]}")


Average MSE score with g=0.4: 17774.013336162112
Average MSE score with g=0.6: 17734.10464995688
Average MSE score with g=0.8: 17696.562832538002
Average MSE score with g=1.0: 17661.420809542098
Average MSE score with g=1.2: 17628.695996424074
Average MSE score with g=1.4: 17598.390491571034
Average MSE score with g=1.6: 17570.491502459376
Best g is 1.6 with the lowest average MSE score of 17570.491502459376


### Model Performance ###

In [19]:
mse_scores = []
mae_scores = []
r2_scores = []
pearson_scores = []
null_model_scores = []

# Lists for storing null model metrics
null_model_mae_scores = []
null_model_r2_scores = []
null_model_pearson_scores = []

for i, (train_set, test_set) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

    data_train_standardized = data_processor.standardize_data(train_set, train_set, features=features)
    data_test_standardized = data_processor.standardize_data(train_set,test_set,features=features)  

    # Train and evaluate 
    predictions_1 = knn_model.knn_regression(data_test_standardized, data_train_standardized, k=best_k, gamma=best_g*gamma)['Predicted Value']
    
    mse_score = Evaluation.mean_squared_error(data_test_standardized[config['target_column']], predictions_1)
    mae_score = Evaluation.mean_absolute_error(data_test_standardized[config['target_column']], predictions_1)
    r2_score = Evaluation.r2_coefficient(data_test_standardized[config['target_column']], predictions_1)
    pearson_score = Evaluation.pearsons_correlation(data_test_standardized[config['target_column']], predictions_1)
    
    mse_scores.append(mse_score)
    mae_scores.append(mae_score)
    r2_scores.append(r2_score)
    pearson_scores.append(pearson_score)

    # Evaluate null model
    null_model_prediction = null_model.naive_regression(test_set)
    null_model_mse = Evaluation.mean_squared_error(test_set[config['target_column']], null_model_prediction)
    null_model_mae = Evaluation.mean_absolute_error(test_set[config['target_column']], null_model_prediction)
    null_model_r2 = Evaluation.r2_coefficient(test_set[config['target_column']], null_model_prediction)
    null_model_pearson = Evaluation.pearsons_correlation(test_set[config['target_column']], null_model_prediction)
    
    null_model_scores.append(null_model_mse)
    null_model_mae_scores.append(null_model_mae)
    null_model_r2_scores.append(null_model_r2)
    null_model_pearson_scores.append(null_model_pearson)


average_mse_score = sum(mse_scores) / len(mse_scores)
average_mae_score = sum(mae_scores) / len(mae_scores)
average_r2_score = sum(r2_scores) / len(r2_scores)
average_pearson_score = sum(pearson_scores) / len(pearson_scores)
average_null_model_mse = sum(null_model_scores) / len(null_model_scores)
average_null_model_mae = sum(null_model_mae_scores) / len(null_model_mae_scores)
average_null_model_r2 = sum(null_model_r2_scores) / len(null_model_r2_scores)
average_null_model_pearson = sum(null_model_pearson_scores) / len(null_model_pearson_scores)

print(f"Average null model MSE score: {average_null_model_mse}")
print(f"Average null model MAE score: {average_null_model_mae}")
print(f"Average null model r2 score: {average_null_model_r2}")
print(f"Average null model pearson score: {average_null_model_pearson}")
print(f"Average MSE score for k={best_k}, g={round(best_g,2)}: {average_mse_score}")
print(f"Average MAE score for k={best_k}, g={round(best_g,2)}: {average_mae_score}")
print(f"Average r2 score for k={best_k}, g={round(best_g,2)}: {average_r2_score}")
print(f"Average pearson score for k={best_k}, g={round(best_g,2)}: {average_pearson_score}")




Average null model MSE score: 19073.865381296106
Average null model MAE score: 86.17113664476773
Average null model r2 score: 0.0
Average null model pearson score: nan
Average MSE score for k=2, g=1.6: 7512.193936429954
Average MAE score for k=2, g=1.6: 34.97747282263274
Average r2 score for k=2, g=1.6: 0.6523904298418604
Average pearson score for k=2, g=1.6: 0.8250440267334402




## Edited KNN ##

### Hyperparameter Tuning ###

In [20]:
epsilon = statistics.stdev(data_3[config['target_column']])
epsilon

160.83073308779512

#### Tuning Epsilon ####

In [21]:
hyperparameters = np.arange(0.1,2,0.2)
scores_dict = {}

for e in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

        edited_train_set = knn_model.edited_knn_regression(train_set_1, train_set_2, epsilon=epsilon*e, gamma=gamma)

        data_train_standardized = data_processor.standardize_data(edited_train_set, edited_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(edited_train_set,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=best_k, gamma=gamma*best_g)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average score with e={round(e,2)}: {average_score}")
    scores_dict[e] = average_score

best_e = min(scores_dict, key=scores_dict.get)
print(f"Best e is {round(best_e,2)} with the lowest average MSE score of {scores_dict[best_e]}")


Average score with e=0.1: 42999.635327469165
Average score with e=0.3: 30739.736546710512
Average score with e=0.5: 30512.572844300874
Average score with e=0.7: 30095.55748334244
Average score with e=0.9: 28403.882792172655
Average score with e=1.1: 27297.548205825537
Average score with e=1.3: 26649.147142497724
Average score with e=1.5: 25927.40625403726
Average score with e=1.7: 25927.40625403726
Average score with e=1.9: 25550.96864960234
Best e is 1.9 with the lowest average MSE score of 25550.96864960234


#### Tuning k ####

In [22]:
hyperparameters = np.arange(1,10,1)
scores_dict = {}

for k in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

        edited_train_set = knn_model.edited_knn_regression(train_set_1, train_set_2, epsilon=epsilon*best_e, gamma=best_g*gamma)

        data_train_standardized = data_processor.standardize_data(edited_train_set, edited_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(edited_train_set,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=k, gamma=gamma*best_g)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average score with k={k}: {average_score}")
    scores_dict[k] = average_score

best_k = min(scores_dict, key=scores_dict.get)
print(f"Best k is {best_k} with the lowest average MSE score of {scores_dict[best_k]}")


Average score with k=1: 25886.95476190476
Average score with k=2: 25550.96864960234
Average score with k=3: 25604.431840161058
Average score with k=4: 25594.439567629728
Average score with k=5: 26030.60209764329
Average score with k=6: 26229.53446550515
Average score with k=7: 26460.116329833876
Average score with k=8: 26997.5770240591
Average score with k=9: 27512.966500825092
Best k is 2 with the lowest average MSE score of 25550.96864960234


#### Tuning gamma ####

In [23]:
hyperparameters = np.arange(0.4,1.6,0.2)
scores_dict = {}

for g in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

        edited_train_set = knn_model.edited_knn_regression(train_set_1, train_set_2, epsilon=epsilon*best_e, gamma=gamma)

        data_train_standardized = data_processor.standardize_data(edited_train_set, edited_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(edited_train_set,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=best_k, gamma=g*gamma)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with g={round(g,2)}: {average_score}")
    scores_dict[g] = average_score

best_g = min(scores_dict, key=scores_dict.get)
print(f"Best g is {round(best_g,2)} with the lowest average MSE score of {scores_dict[best_g]}")


Average MSE score with g=0.4: 25757.990160363257
Average MSE score with g=0.6: 25721.248580177926
Average MSE score with g=0.8: 25685.32008919629
Average MSE score with g=1.0: 25650.272930463565
Average MSE score with g=1.2: 25616.16656647792
Average MSE score with g=1.4: 25583.051393640762
Average MSE score with g=1.6: 25550.96864960234
Best g is 1.6 with the lowest average MSE score of 25550.96864960234


In [24]:
mse_scores = []
mae_scores = []
r2_scores = []
pearson_scores = []
null_model_scores = []

# Lists to store null model metrics for MAE, R^2, and Pearson
null_model_mae_scores = []
null_model_r2_scores = []
null_model_pearson_scores = []

for i, (train_set, test_set) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

    edited_train_set = knn_model.edited_knn_regression(train_set, data_val, epsilon=epsilon*best_e, gamma=gamma*best_g)

    data_train_standardized = data_processor.standardize_data(edited_train_set, edited_train_set, features=features)
    data_test_standardized = data_processor.standardize_data(edited_train_set,test_set,features=features)  

    predictions_1 = knn_model.knn_regression(data_test_standardized, data_train_standardized, k=best_k, gamma=best_g*gamma)['Predicted Value']

    mse_score = Evaluation.mean_squared_error(data_test_standardized[config['target_column']], predictions_1)
    mae_score = Evaluation.mean_absolute_error(data_test_standardized[config['target_column']], predictions_1)
    r2_score = Evaluation.r2_coefficient(data_test_standardized[config['target_column']], predictions_1)
    pearson_score = Evaluation.pearsons_correlation(data_test_standardized[config['target_column']], predictions_1)

    mse_scores.append(mse_score)
    mae_scores.append(mae_score)
    r2_scores.append(r2_score)
    pearson_scores.append(pearson_score)
    
    # Null model evaluation
    null_model_prediction = null_model.naive_regression(test_set)
    null_model_mse = Evaluation.mean_squared_error(test_set[config['target_column']], null_model_prediction)
    null_model_mae = Evaluation.mean_absolute_error(test_set[config['target_column']], null_model_prediction)
    null_model_r2 = Evaluation.r2_coefficient(test_set[config['target_column']], null_model_prediction)
    null_model_pearson = Evaluation.pearsons_correlation(test_set[config['target_column']], null_model_prediction)

    null_model_scores.append(null_model_mse)
    null_model_mae_scores.append(null_model_mae)
    null_model_r2_scores.append(null_model_r2)
    null_model_pearson_scores.append(null_model_pearson)

average_mse_score = sum(mse_scores) / len(mse_scores)
average_mae_score = sum(mae_scores) / len(mae_scores)
average_r2_score = sum(r2_scores) / len(r2_scores)
average_pearson_score = sum(pearson_scores) / len(pearson_scores)
average_null_model_mse = sum(null_model_scores) / len(null_model_scores)
average_null_model_mae = sum(null_model_mae_scores) / len(null_model_mae_scores)
average_null_model_r2 = sum(null_model_r2_scores) / len(null_model_r2_scores)
average_null_model_pearson = sum(null_model_pearson_scores) / len(null_model_pearson_scores)

print(f"Average null model MSE score: {average_null_model_mse}")
print(f"Average null model MAE score: {average_null_model_mae}")
print(f"Average null model R2 score: {average_null_model_r2}")
print(f"Average null model Pearson score: {average_null_model_pearson}")
print(f"Average MSE score for k={best_k}, g={round(best_g,2)}: {average_mse_score}")
print(f"Average MAE score for k={best_k}, g={round(best_g,2)}: {average_mae_score}")
print(f"Average R2 score for k={best_k}, g={round(best_g,2)}: {average_r2_score}")
print(f"Average Pearson score for k={best_k}, g={round(best_g,2)}: {average_pearson_score}")




Average null model MSE score: 19073.865381296106
Average null model MAE score: 86.17113664476773
Average null model R2 score: 0.0
Average null model Pearson score: nan
Average MSE score for k=2, g=1.6: 7120.230084644386
Average MAE score for k=2, g=1.6: 33.98147919156431
Average R2 score for k=2, g=1.6: 0.6787177432238206
Average Pearson score for k=2, g=1.6: 0.8429020253796559




## Condensed Knn ##

#### Tuning Epsilon ####

In [25]:
hyperparameters = np.arange(0.1,1.5,0.1)
scores_dict = {}

for e in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

        condesed_train_set = knn_model.condensed_knn_regression(train_set_1,epsilon=epsilon*e)

        data_train_standardized = data_processor.standardize_data(condesed_train_set, condesed_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(condesed_train_set,data_val,features=features)

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=best_k, gamma=gamma*best_g)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with e={round(e,2)}: {average_score}")
    scores_dict[e] = average_score

best_e = min(scores_dict, key=scores_dict.get)
print(f"Best e is {round(best_e,2)} with the lowest average MSE score of {scores_dict[best_e]}")


Average MSE score with e=0.1: 16750.665662590403
Average MSE score with e=0.2: 16217.471721143553
Average MSE score with e=0.3: 16848.734164960755
Average MSE score with e=0.4: 16828.95606619945
Average MSE score with e=0.5: 16794.050588262176
Average MSE score with e=0.6: 17026.950210817155
Average MSE score with e=0.7: 16508.968410857517
Average MSE score with e=0.8: 18013.997775298772
Average MSE score with e=0.9: 18760.35715947352
Average MSE score with e=1.0: 19045.55144025306
Average MSE score with e=1.1: 26807.65003985834
Average MSE score with e=1.2: 21760.168135983105
Average MSE score with e=1.3: 20664.130955020046
Average MSE score with e=1.4: 19421.134063082907
Best e is 0.2 with the lowest average MSE score of 16217.471721143553


#### Tuning k ####

In [26]:
hyperparameters = np.arange(1,10,1)
scores_dict = {}

for k in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

        condesed_train_set = knn_model.condensed_knn_regression(train_set_1,epsilon=epsilon*best_e)

        data_train_standardized = data_processor.standardize_data(condesed_train_set, condesed_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(condesed_train_set,data_val,features=features)

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=k, gamma=gamma*best_g)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with k={k}: {average_score}")
    scores_dict[k] = average_score

best_k = min(scores_dict, key=scores_dict.get)
print(f"Best k is {best_k} with the lowest average MSE score of {scores_dict[best_k]}")


Average MSE score with k=1: 16243.657142857142
Average MSE score with k=2: 16650.054883090394
Average MSE score with k=3: 18230.540686202996
Average MSE score with k=4: 18912.69497080522
Average MSE score with k=5: 20037.32504621733
Average MSE score with k=6: 20586.2577013997
Average MSE score with k=7: 21519.141635076903
Average MSE score with k=8: 23087.065715328477
Average MSE score with k=9: 23002.66607474599
Best k is 1 with the lowest average MSE score of 16243.657142857142


#### Tuning Gamma ####

In [27]:
hyperparameters = np.arange(0.4,1.6,0.2)
scores_dict = {}

for g in hyperparameters: 
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

        condesed_train_set = knn_model.condensed_knn_regression(train_set_1,epsilon=epsilon*best_e)

        data_train_standardized = data_processor.standardize_data(condesed_train_set, condesed_train_set, features=features)
        data_val_standardized = data_processor.standardize_data(condesed_train_set,data_val,features=features)  

        predictions_1 = knn_model.knn_regression(data_val_standardized, data_train_standardized, k=best_k, gamma=g*gamma)['Predicted Value']
        score_1 = Evaluation().mean_squared_error(data_val_standardized[config['target_column']], predictions_1)
        scores.append(score_1)

    average_score = sum(scores) / len(scores)
    print(f"Average MSE score with g={round(g,2)}: {average_score}")
    scores_dict[g] = average_score

best_g = min(scores_dict, key=scores_dict.get)
print(f"Best g is {round(best_g,2)} with the lowest average MSE score of {scores_dict[best_g]}")


Average MSE score with g=0.4: 16242.516666666663
Average MSE score with g=0.6: 16231.10238095238
Average MSE score with g=0.8: 16121.114285714288
Average MSE score with g=1.0: 16739.373809523808
Average MSE score with g=1.2: 16480.93571428571
Average MSE score with g=1.4: 16125.6
Average MSE score with g=1.6: 16616.485714285714
Best g is 0.8 with the lowest average MSE score of 16121.114285714288


### Model Performance ###

In [29]:
mse_scores = []
mae_scores = []
r2_scores = []
pearson_scores = []
null_model_scores = []

# Lists to store null model metrics for MAE, R^2, and Pearson
null_model_mae_scores = []
null_model_r2_scores = []
null_model_pearson_scores = []

for i, (train_set, test_set) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, random_state=42, stratify=False)):

    condesed_train_set = knn_model.condensed_knn_regression(train_set,epsilon=epsilon*best_e)

    data_train_standardized = data_processor.standardize_data(condesed_train_set, condesed_train_set, features=features)
    data_test_standardized = data_processor.standardize_data(edited_train_set,test_set,features=features)  

    predictions_1 = knn_model.knn_regression(data_test_standardized, data_train_standardized, k=best_k, gamma=best_g*gamma)['Predicted Value']

    mse_score = Evaluation.mean_squared_error(data_test_standardized[config['target_column']], predictions_1)
    mae_score = Evaluation.mean_absolute_error(data_test_standardized[config['target_column']], predictions_1)
    r2_score = Evaluation.r2_coefficient(data_test_standardized[config['target_column']], predictions_1)
    pearson_score = Evaluation.pearsons_correlation(data_test_standardized[config['target_column']], predictions_1)

    mse_scores.append(mse_score)
    mae_scores.append(mae_score)
    r2_scores.append(r2_score)
    pearson_scores.append(pearson_score)
    
    # Null model evaluation
    null_model_prediction = null_model.naive_regression(test_set)
    null_model_mse = Evaluation.mean_squared_error(test_set[config['target_column']], null_model_prediction)
    null_model_mae = Evaluation.mean_absolute_error(test_set[config['target_column']], null_model_prediction)
    null_model_r2 = Evaluation.r2_coefficient(test_set[config['target_column']], null_model_prediction)
    null_model_pearson = Evaluation.pearsons_correlation(test_set[config['target_column']], null_model_prediction)

    null_model_scores.append(null_model_mse)
    null_model_mae_scores.append(null_model_mae)
    null_model_r2_scores.append(null_model_r2)
    null_model_pearson_scores.append(null_model_pearson)


average_mse_score = sum(mse_scores) / len(mse_scores)
average_mae_score = sum(mae_scores) / len(mae_scores)
average_r2_score = sum(r2_scores) / len(r2_scores)
average_pearson_score = sum(pearson_scores) / len(pearson_scores)
average_null_model_mse = sum(null_model_scores) / len(null_model_scores)
average_null_model_mae = sum(null_model_mae_scores) / len(null_model_mae_scores)
average_null_model_r2 = sum(null_model_r2_scores) / len(null_model_r2_scores)
average_null_model_pearson = sum(null_model_pearson_scores) / len(null_model_pearson_scores)


print(f"Average null model MSE score: {average_null_model_mse}")
print(f"Average null model MAE score: {average_null_model_mae}")
print(f"Average null model R2 score: {average_null_model_r2}")
print(f"Average null model Pearson score: {average_null_model_pearson}")
print(f"Average MSE score for k={best_k}, g={round(best_g,2)}: {average_mse_score}")
print(f"Average MAE score for k={best_k}, g={round(best_g,2)}: {average_mae_score}")
print(f"Average R2 score for k={best_k}, g={round(best_g,2)}: {average_r2_score}")
print(f"Average Pearson score for k={best_k}, g={round(best_g,2)}: {average_pearson_score}")




Average null model MSE score: 19073.865381296106
Average null model MAE score: 86.17113664476773
Average null model R2 score: 0.0
Average null model Pearson score: nan
Average MSE score for k=1, g=0.8: 14169.29020367183
Average MAE score for k=1, g=0.8: 67.22534423407917
Average R2 score for k=1, g=0.8: -0.015298191823884966
Average Pearson score for k=1, g=0.8: 0.7645702132719759


