# Model erstellen und trainieren

# 1) Libraries importieren

In [1]:
# Libraries importieren
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 2) Daten einlesen

In [2]:
# Daten importieren
df_clean = pd.read_csv("/Users/allegratrepte/Desktop/clean_data.csv")

# Daten inspizieren
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69798 entries, 0 to 69797
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   body_type             69798 non-null  object 
 1   fuel_type             69798 non-null  object 
 2   horsepower            69798 non-null  float64
 3   make_name             69798 non-null  object 
 4   mileage               69798 non-null  float64
 5   model_name            69798 non-null  object 
 6   price                 69798 non-null  float64
 7   wheel_system_display  69798 non-null  object 
 8   average_fuel_economy  69798 non-null  float64
 9   manual                69798 non-null  int64  
 10  age                   69798 non-null  int64  
dtypes: float64(4), int64(2), object(5)
memory usage: 5.9+ MB


# 4) Dummy-Variablen erstellen

In [3]:
df_dummies = pd.get_dummies(df_clean, drop_first=True, dtype=float)

In [4]:
df_dummies.head()

Unnamed: 0,horsepower,mileage,price,average_fuel_economy,manual,age,body_type_Coupe,body_type_Hatchback,body_type_Minivan,body_type_Pickup Truck,...,model_name_iA,model_name_iM,model_name_iQ,model_name_tC,model_name_xA,model_name_xD,wheel_system_display_All-Wheel Drive,wheel_system_display_Four-Wheel Drive,wheel_system_display_Front-Wheel Drive,wheel_system_display_Rear-Wheel Drive
0,184.0,25794.0,13000.0,29.5,0,4,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,295.0,15732.0,27300.0,21.5,0,2,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,138.0,4580.0,15724.0,29.0,0,2,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,180.0,25122.0,21000.0,24.0,0,2,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,241.0,61161.0,17300.0,20.5,0,6,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [5]:
df_dummies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69798 entries, 0 to 69797
Columns: 744 entries, horsepower to wheel_system_display_Rear-Wheel Drive
dtypes: float64(742), int64(2)
memory usage: 396.2 MB


# 6) Daten für Training und Testing erstellen

 X und y definieren

In [6]:
X = df_dummies.drop(["price"], axis=1)
y = df_dummies["price"]

Training und Testdaten splitten

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 7) Modell Auswahl

## 7.1  Modell 1 - Lineare Regression

7.1.1 Ohne CrossValidation

In [8]:
# Imports
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
import numpy as np

# Modell erstellen und trainieren
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Prediction
y_pred = lin_reg.predict(X_test)


#Vorhersagen auf dem Trainingsset machen
y_pred_train = lin_reg.predict(X_train)

mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)

print("MSE Train:", mse_train)
print("MAE Train:", mae_train)
print("RMSE Train:", np.sqrt(mse_train))
print("R²-Wert Train:", r2_train)
print("MAPE Train:", mape_train)


# Modellevaluierung auf den Test Daten

#Vorhersagen auf dem Testset machen
y_pred = lin_reg.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("___________________")

print("MSE:", mse)
print("MAE:", mae)
print("RMSE:", np.sqrt(mse))
print("R²-Wert:", r2)
print("MAPE:", mape)


MSE Train: 28328899.510255106
MAE Train: 3820.674987357192
RMSE Train: 5322.489972771683
R²-Wert Train: 0.8552735236403478
MAPE Train: 0.19559723031829077
___________________
MSE: 1.443551315359794e+17
MAE: 9548446.542870991
RMSE: 379940957.96054864
R²-Wert: -733154787.6512549
MAPE: 1222.2843581223776


Die Mean Absolute Percentage Error (MAPE) misst die Genauigkeit einer Vorhersage, indem sie den durchschnittlichen absoluten Fehler als Prozentsatz der tatsächlichen Werte berechnet. Sie zeigt, wie weit die Vorhersagen im Durchschnitt von den tatsächlichen Werten abweichen, relativ zu den tatsächlichen Werten. Dadurch ermöglicht MAPE einen einfachen Vergleich der Vorhersagegenauigkeit über verschiedene Datensätze hinweg, unabhängig von der Skalierung der Daten.

Quelle: https://chatgpt.com/share/a9cfb871-f4a2-46e0-9bdd-7d844967597a 

7.1.2 Lineare Regression mit CrossValidation

Das macht irgendwie keinen Sinn: ??

In [9]:
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score



# Modell erstellen
lin_reg = LinearRegression()

# Cross-Validation anwenden
# Hier verwenden wir 5-fache Cross-Validation und den R^2 Score als Bewertungsmetrik
cv_scores = cross_val_score(lin_reg, X_train, y_train, cv=5, scoring='r2')
print("Cross-Validation R^2 Scores: ", cv_scores)
print("Durchschnittlicher R^2 Score: ", np.mean(cv_scores))
print("Standardabweichung der R^2 Scores: ", np.std(cv_scores))

# Modell auf dem Trainingsdatensatz trainieren
lin_reg.fit(X_train, y_train)

#Vorhersagen auf dem Trainingsset machen
y_pred_train = lin_reg.predict(X_train)

mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)

print("MSE Train:", mse_train)
print("MAE Train:", mae_train)
print("RMSE Train:", np.sqrt(mse_train))
print("R²-Wert Train:", r2_train)
print("MAPE Train:", mape_train)


# Modellevaluierung auf den Test Daten

#Vorhersagen auf dem Testset machen
y_pred = lin_reg.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("___________________")

print("MSE:", mse)
print("MAE:", mae)
print("RMSE:", np.sqrt(mse))
print("R²-Wert:", r2)
print("MAPE:", mape)


Cross-Validation R^2 Scores:  [-2.24582128e+07 -6.01441760e+08 -3.43515744e+06 -1.11395497e+06
 -5.66553809e+08]
Durchschnittlicher R^2 Score:  -239000578.74805745
Standardabweichung der R^2 Scores:  282002282.95532537
MSE Train: 28328899.510255106
MAE Train: 3820.674987357192
RMSE Train: 5322.489972771683
R²-Wert Train: 0.8552735236403478
MAPE Train: 0.19559723031829077
___________________
MSE: 1.443551315359794e+17
MAE: 9548446.542870991
RMSE: 379940957.96054864
R²-Wert: -733154787.6512549
MAPE: 1222.2843581223776


Quelle: https://chatgpt.com/share/45ad46bc-c945-4c74-8aa2-deadc4f621b9

## 7.2) Decision Tree Regressor

7.2.1 Ohne CrossValdation

In [10]:
# Imports
from sklearn.tree import DecisionTreeRegressor
# Notwendige Importe
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# Decision Tree Regressor erstellen
tree_model = DecisionTreeRegressor(random_state=42)

# Modell trainieren
tree_model.fit(X_train, y_train)


# Modellevaluierung auf den Trainings Daten

#Vorhersagen auf dem Trainingsset machen
y_pred_train = tree_model.predict(X_train)

mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)

print("MSE Train:", mse_train)
print("MAE Train:", mae_train)
print("RMSE Train:", np.sqrt(mse_train))
print("R²-Wert Train:", r2_train)
print("MAPE Train:", mape_train)


# Modellevaluierung auf den Test Daten

#Vorhersagen auf dem Testset machen
y_pred = tree_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("___________________")

print("MSE:", mse)
print("MAE:", mae)
print("RMSE:", np.sqrt(mse))
print("R²-Wert:", r2)
print("MAPE:", mape)

MSE Train: 4570015.718991546
MAE Train: 892.1108191003916
RMSE Train: 2137.759509157086
R²-Wert Train: 0.9766527368393382
MAPE Train: 0.026414838809876526
___________________
MSE: 21388439.54460423
MAE: 3163.9208235249157
RMSE: 4624.763728516758
R²-Wert: 0.8913718084902574
MAPE: 0.12980205052142674


Noch löschnen eventuell: 7.2.2 Mit Crossvalidation: Variante (ChatGPT Vorschlag)

In [11]:
# Notwendige Importe
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_score, KFold
import numpy as np

# Decision Tree Regressor erstellen
tree_model = DecisionTreeRegressor(random_state=42)

# Cross-Validation Setup
kf = KFold(n_splits=5, shuffle=True, random_state=42)  # 5-Fold Cross-Validation

# Cross-Validation Scores
mse_scores = -cross_val_score(tree_model, X_train, y_train, cv=kf, scoring='neg_mean_squared_error')
mae_scores = -cross_val_score(tree_model, X_train, y_train, cv=kf, scoring='neg_mean_absolute_error')
r2_scores = cross_val_score(tree_model, X_train, y_train, cv=kf, scoring='r2')

# Mittelwerte der Scores
mean_mse = mse_scores.mean()
mean_mae = mae_scores.mean()
mean_rmse = np.sqrt(mean_mse)
mean_r2 = r2_scores.mean()

print("Cross-Validation MSE:", mean_mse)
print("Cross-Validation MAE:", mean_mae)
print("Cross-Validation RMSE:", mean_rmse)
print("Cross-Validation R²-Wert:", mean_r2)

Cross-Validation MSE: 21252685.77627711
Cross-Validation MAE: 3159.2481806813457
Cross-Validation RMSE: 4610.063532780987
Cross-Validation R²-Wert: 0.8914015997553569


Anderer: ChatGPT Vorschlag: https://chatgpt.com/share/a0dac765-c550-4949-8887-a07e0b21a0f0

7.2.3 Mit CrossValidation: Variante Vorlesung

In [12]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# Parameter für GridSearchCV
parameters = {
    'max_depth': [1, 30],
    'min_samples_leaf': [1, 10, 20, 30, 50, 100],
}

# DecisionTreeRegressor-Modell mit gültigem Kriterium initialisieren
tree_model = DecisionTreeRegressor(criterion="squared_error", random_state=1)
tree_model_CV = GridSearchCV(tree_model, parameters, cv=5)

# Trainieren Sie das Modell mit den Trainingsdaten
tree_model_CV.fit(X_train, y_train)


#Vorhersagen auf dem Trainingsset machen
y_pred_train = tree_model_CV.predict(X_train)

mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)

print("MSE Train:", mse_train)
print("MAE Train:", mae_train)
print("RMSE Train:", np.sqrt(mse_train))
print("R²-Wert Train:", r2_train)
print("MAPE Train:", mape_train)


# Modellevaluierung auf den Test Daten

#Vorhersagen auf dem Testset machen
y_pred = tree_model_CV.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("___________________")

print("MSE:", mse)
print("MAE:", mae)
print("RMSE:", np.sqrt(mse))
print("R²-Wert:", r2)
print("MAPE:", mape)




MSE Train: 13117654.676177952
MAE Train: 2484.904276477114
RMSE Train: 3621.83029367445
R²-Wert Train: 0.9329846209275197
MAPE Train: 0.09983002221841651
___________________
MSE: 18684683.20614332
MAE: 2944.9475296701344
RMSE: 4322.578305380172
R²-Wert: 0.9051037201015515
MAPE: 0.1208133394688803


## 7.3) Random Forest Regressor

In [13]:
# Imports
from sklearn.ensemble import RandomForestRegressor

# Random Forest Regressor erstellen
forest_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Modell trainieren
forest_model.fit(X_train, y_train)

#EVALUIERUNG

#Vorhersagen auf dem Trainingsset machen
y_pred_train = forest_model.predict(X_train)

mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)

print("MSE Train:", mse_train)
print("MAE Train:", mae_train)
print("RMSE Train:", np.sqrt(mse_train))
print("R²-Wert Train:", r2_train)
print("MAPE Train:", mape_train)


# Modellevaluierung auf den Test Daten

#Vorhersagen auf dem Testset machen
y_pred = forest_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("___________________")

print("MSE:", mse)
print("MAE:", mae)
print("RMSE:", np.sqrt(mse))
print("R²-Wert:", r2)
print("MAPE:", mape)





MSE Train: 5778766.312023349
MAE Train: 1482.4931942546452
RMSE Train: 2403.906469067245
R²-Wert Train: 0.9704774805762488
MAPE Train: 0.05422443576281316
___________________
MSE: 15675809.47545541
MAE: 2701.1468394560166
RMSE: 3959.268805657859
R²-Wert: 0.9203852702662644
MAPE: 0.10754129026290545


Modell Exportieren

In [14]:
import pickle

# Dateipfad in Google Drive
filename = '/Users/allegratrepte/Desktop/new_model.sav'

# Modell exportieren
pickle.dump(forest_model, open(filename, 'wb'))

print(f"Das Modell wurde erfolgreich in der Datei '{filename}' gespeichert.")


Das Modell wurde erfolgreich in der Datei '/Users/allegratrepte/Desktop/new_model.sav' gespeichert.


In [15]:
mae = mean_absolute_error(y_test, y_pred)
mae

2701.1468394560166

Modell mit CrossValidation und Hyperparamentern

Noch löschen:

In [16]:
from sklearn.model_selection import train_test_split

##X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, random_state=42, shuffle=True)

In [None]:
# Notwendige Importe
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# Parameterbereich definieren
parameters = {
    'n_estimators': [100],
    'max_depth': [1],
    'min_samples_split': [ 10],
    'min_samples_leaf': [10,100],
}

# Konfiguriere den KFold-Generator für Kreuzvalidierung
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Erstelle das GridSearchCV-Objekt
grid_search = GridSearchCV(RandomForestRegressor(random_state=42), parameters, cv= 5)

# Trainiere das Modell mit Grid-Search
grid_search.fit(X_train, y_train)

# Beste Hyperparameter
print("Beste Hyperparameter:", grid_search.best_params_)

# Trainiere das finale Modell mit den besten Hyperparametern
best_forest_model = grid_search.best_estimator_

#EVALUATION

#Vorhersagen auf dem Trainingsset machen
y_pred_train = best_forest_model.predict(X_train)

mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)

print("MSE Train:", mse_train)
print("MAE Train:", mae_train)
print("RMSE Train:", np.sqrt(mse_train))
print("R²-Wert Train:", r2_train)
print("MAPE Train:", mape_train)


# Modellevaluierung auf den Test Daten

#Vorhersagen auf dem Testset machen
y_pred = best_forest_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("___________________")

print("MSE:", mse)
print("MAE:", mae)
print("RMSE:", np.sqrt(mse))
print("R²-Wert:", r2)
print("MAPE:", mape)



## 10) Modell 4 - Gradient Boosting Regressor

In [18]:
#Import
from sklearn.ensemble import GradientBoostingRegressor

# Gradient Boosting Regressor erstellen
gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)

# Modell trainieren
gb_model.fit(X_train, y_train)


#EVALUATION

#Vorhersagen auf dem Trainingsset machen
y_pred_train = gb_model.predict(X_train)

mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)

print("MSE Train:", mse_train)
print("MAE Train:", mae_train)
print("RMSE Train:", np.sqrt(mse_train))
print("R²-Wert Train:", r2_train)
print("MAPE Train:", mape_train)


# Modellevaluierung auf den Test Daten

#Vorhersagen auf dem Testset machen
y_pred = gb_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("___________________")

print("MSE:", mse)
print("MAE:", mae)
print("RMSE:", np.sqrt(mse))
print("R²-Wert:", r2)
print("MAPE:", mape)

MSE Train: 22529953.310120787
MAE Train: 3382.2748925057226
RMSE Train: 4746.572796252132
R²-Wert Train: 0.8848991379301235
MAPE Train: 0.13908377355748117
___________________
MSE: 23353135.797304146
MAE: 3445.6320052584156
RMSE: 4832.508230443498
R²-Wert: 0.8813934554481065
MAPE: 0.14122276385120555


Mit CrossValidation und Hyperparametern

In [19]:
# Notwendige Importe
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# Parameterbereich definieren
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

# Konfiguriere den KFold-Generator für Kreuzvalidierung
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Erstelle das GridSearchCV-Objekt
grid_search = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid, cv=kf, scoring='neg_mean_squared_error', verbose=1)

# Trainiere das Modell mit Grid-Search
grid_search.fit(X_train, y_train)

# Beste Hyperparameter
print("Beste Hyperparameter:", grid_search.best_params_)

# Trainiere das finale Modell mit den besten Hyperparametern
best_gb_model = grid_search.best_estimator_

#EVALUATION

#Vorhersagen auf dem Trainingsset machen
y_pred_train = best_gb_model.predict(X_train)

mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)
mape_train = mean_absolute_percentage_error(y_train, y_pred_train)

print("MSE Train:", mse_train)
print("MAE Train:", mae_train)
print("RMSE Train:", np.sqrt(mse_train))
print("R²-Wert Train:", r2_train)
print("MAPE Train:", mape_train)


# Modellevaluierung auf den Test Daten

#Vorhersagen auf dem Testset machen
y_pred = best_gb_model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)

print("___________________")

print("MSE:", mse)
print("MAE:", mae)
print("RMSE:", np.sqrt(mse))
print("R²-Wert:", r2)
print("MAPE:", mape)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
Beste Hyperparameter: {'learning_rate': 0.2, 'max_depth': 5, 'n_estimators': 300}
MSE Train: 11315652.460905135
MAE Train: 2411.571766370248
RMSE Train: 3363.874620271263
R²-Wert Train: 0.9421906767756938
MAPE Train: 0.0980257787679142
___________________
MSE: 13488030.8564886
MAE: 2597.9225179983273
RMSE: 3672.6054588654906
R²-Wert: 0.9314966201291861
MAPE: 0.10464618778283413


Quelle CrossValidation: https://chatgpt.com/share/6fcf8984-041d-4b38-8beb-e93c2e32ccd0

## Notizen die noch gelöscht werden

In [20]:
# Libraries
from sklearn.model_selection import KFold
# Notwendige Importe
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# Parameterbereich definieren
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

# Konfiguriere den KFold-Generator für Kreuzvalidierung
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Erstelle das GridSearchCV-Objekt
grid_search = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid, cv=kf, scoring='neg_mean_squared_error', verbose=1)

# Trainiere das Modell mit Grid-Search
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [None]:
# Libraries
from sklearn.model_selection import GridSearchCV, KFold

# Parameterbereich definieren
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

# Konfiguriere den KFold-Generator für Kreuzvalidierung
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Erstelle das GridSearchCV-Objekt
grid_search = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid, cv=kf, scoring='neg_mean_squared_error', verbose=1)

# Trainiere das Modell mit Grid-Search
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 27 candidates, totalling 135 fits


In [None]:
# Konfiguriere den KFold-Generator für Kreuzvalidierung
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Erstelle das GridSearchCV-Objekt
grid_search = GridSearchCV(GradientBoostingRegressor(random_state=42), param_grid, cv=kf, scoring='neg_mean_squared_error', verbose=1)

# Trainiere das Modell mit Grid-Search
grid_search.fit(X_train, y_train)

## 12) Das beste Modell auswählen und auf den ganzen Daten trainieren