In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KNeighborsRegressor

from sklearn.model_selection import GridSearchCV
import gc  # Garbage Collector zur Speicherverwaltung
import sys


sys.path.append(os.path.abspath("../../"))

from utils.scatter_plot import scatter_prediction
from utils.eval_call import evaluate_model

from Preprocessing.split import split_data
from Preprocessing.DropAll_preprocessing_pipeline import preprocessing_pipeline



df = pd.read_csv('../../data.csv')

#df.info()

In [3]:
df=preprocessing_pipeline(df)

In [4]:
X_train, X_test, y_train, y_test , X,y, categorical_features , numeric_features = split_data(df)

In [6]:

numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())  # Scaling is crucial for KNN
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('ordinal', OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)),
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])



final_knn_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('model', KNeighborsRegressor(
        n_neighbors=4,
        weights='distance',  
        algorithm='auto',
        p = 2, #Power parameter for the Minkowski metric. When p = 1, this is equivalent to using manhattan_distance (l1), and euclidean_distance (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
        n_jobs=-1  
    ))
])


print("\nTraining KNN model ...")
final_knn_pipeline.fit(X_train, y_train)

# Make predictions
y_pred_knn = final_knn_pipeline.predict(X_test)

# Evaluate model
evaluate_model(y_test, y_pred_knn, "KNN Regression")


# Clean up memory
gc.collect()





Training KNN model ...
KNN Regression Performance Metrics:
MAE: 3842.21
MSE: 150416436.67
RMSE: 12264.44
R²: 0.85
------------------------------
KNN Regression Performance Metrics:
MAE: 3842.21
MSE: 150416436.67
RMSE: 12264.44
R²: 0.85
------------------------------


23