In [1]:
import pandas as pd
import numpy as np
import pandas
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer, TransformedTargetRegressor
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KNeighborsRegressor


import sys
import os

sys.path.append(os.path.abspath("../../.."))

from Preprocessing.imputation import get_imputation_maps, apply_imputation, ContextImputer

from Preprocessing.split_new import split_data

from utils.scatter_plot import scatter_prediction
from utils.eval_call import evaluate_model

In [2]:
def main():


    X_train, X_test, y_train, y_test, categorical_features , numeric_features = split_data('../../../data.csv', description = True)

    # Preprocessing-Pipelines erstellen
    numeric_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler())
    ])

    categorical_transformer = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('onehot', OneHotEncoder(handle_unknown='ignore'))
    ])

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numeric_features),
            ('cat', categorical_transformer, categorical_features)
        ])
    
    knn_pipeline = Pipeline(steps=[
        ('imp_fc', ContextImputer('fuel_consumption_l_100km')),
        ('imp_ps', ContextImputer('power_ps')),
        ('imp_er', ContextImputer('electric_range')),
        ('preprocessor', preprocessor),
        ('model', KNeighborsRegressor())
    ])

    # Modell trainieren
    knn_pipeline.fit(X_train, y_train)

    # Vorhersagen treffen
    y_pred_knn = knn_pipeline.predict(X_test)



    evaluate_model(y_test, y_pred_knn, "KNN Regression")
    scatter_prediction(y_test, y_pred_knn, "KNN Regression")

    # Prüfe, ob fuel_type in X_test enthalten ist
    if 'fuel_type' in X_test.columns:
        results_df = pd.DataFrame({
            'fuel_type': X_test['fuel_type'].values,
            'y_true': y_test.values,
            'y_pred': y_pred_knn
        })


if __name__ == "__main__":
    main()

TypeError: split_data() got an unexpected keyword argument 'description'