In [1]:
from src.data_preprocessor import DataProcessor
from src.cross_validation import CrossValidation
from src.evaluation import Evaluation
from models.knn import KNN
from models.null_model import NullModelClassification, NullModelRegression
from data_configs.configs import *
import statistics

config = albalone_config
data_processor = DataProcessor(config=config)
cross_validator = CrossValidation(config=config)
classification_nullmodel = NullModelClassification(config=config)
regression_nullmodel = NullModelRegression(config=config)
knn_model = KNN(config)

In [2]:
# Data Processing

raw_data = data_processor.load_data()

data_1 = data_processor.impute_missing_values(raw_data)

data_2 = data_processor.encode_nominal_features(data_1)

data_3 = data_processor.encode_ordinal_features(data_2)

In [3]:
data_train, data_val = cross_validator.random_partition(data_3, random_state=42)

In [4]:
gamma = 1/(statistics.stdev(data_train[config['target_column']]))

In [None]:
hyperparameters = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
for k in hyperparameters:  
    scores = []
    for i, (train_set_1, train_set_2) in enumerate(cross_validator.cross_validation(data_train, n_splits=2, n_repeats=5, stratify=False)):
        # Train and evaluate using train_set_1
        condensed_train = knn_model.condensed_knn_regression(train_set_1,5)
        predictions_1 = knn_model.knn_regression(data_val, condensed_train, k=k, gamma=gamma)['Predicted Value']
        score = Evaluation().mean_squared_error(data_val[config['target_column']], predictions_1)
        scores.append(score)
        
    average_score = sum(scores) / len(scores)
    print(f"Average score for k={k}: {average_score}")
