In [None]:
from xgboost import XGBRegressor
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_pinball_loss, make_scorer

In [None]:
def get_feature_importance(model, features):
  # Get feature importances from the trained XGBoost model
  feature_importances = model.feature_importances_

  # Create a DataFrame to display feature importances
  feature_importance_df = pd.DataFrame({'Feature': features,
                                        'Importance': feature_importances})

  # Sort the DataFrame by importance in descending order
  feature_importance_df = feature_importance_df.sort_values('Importance', ascending=False)

  # feature importance DataFrame
  return feature_importance_df

In [None]:
def pinball_score(y_true, y_pred):
    return np.array([mean_pinball_loss(y_true, pred, alpha=(i+1)/10) for i, pred in enumerate(y_pred.T)]).mean()

In [None]:
x_solar_train = pd.read_parquet("x_solar_train_ohe.parquet")
x_solar_test = pd.read_parquet("x_solar_test_ohe.parquet")
y_solar_train = pd.read_parquet("y_solar_train.parquet")
y_solar_test = pd.read_parquet("y_solar_test.parquet")

In [None]:
x_solar_train.head()

Unnamed: 0,CloudCover,SolarDownwardRadiation,temp_solar,year,month,day,hour,hours_after,SolarDownwardRadiation_standardScaled,SolarDownwardRadiation_minMaxScaled,SolarDownwardRadiation_normalized,time_of_day_afternoon,time_of_day_morning,time_of_day_night,season_autumn,season_spring,season_summer,season_winter
0,0.450405,0.0,13.646173,2020,9,20,0,0.0,-0.669951,0.000261,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
1,0.472211,0.0,13.658508,2020,9,20,0,0.5,-0.669951,0.000261,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
2,0.494018,0.0,13.670843,2020,9,20,1,1.0,-0.669951,0.000261,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
3,0.520214,0.0,13.732101,2020,9,20,1,1.5,-0.669951,0.000261,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
4,0.54641,0.0,13.79336,2020,9,20,2,2.0,-0.669951,0.000261,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0


In [None]:
y_solar_train.head()

Unnamed: 0,Solar_MWh_credit
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0


In [None]:
x_solarSDR_train_unscaled = x_solar_train[["SolarDownwardRadiation"]].to_numpy()
x_solarSDR_test_unscaled = x_solar_test[["SolarDownwardRadiation"]].to_numpy()
y_solarSDR_train_unscaled = y_solar_train[["Solar_MWh_credit"]].to_numpy()
y_solarSDR_test_unscaled = y_solar_test[["Solar_MWh_credit"]].to_numpy()

In [None]:
model_solarSDR_unscaled = XGBRegressor(device="cuda", max_depth=2, n_estimators=1000, objective="reg:quantileerror",
                     quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_solarSDR_unscaled.fit(x_solarSDR_train_unscaled, y_solarSDR_train_unscaled)

In [None]:
prediction_solarSDR_unscaled_train = model_solarSDR_unscaled.predict(x_solarSDR_train_unscaled)
prediction_solarSDR_unscaled_test = model_solarSDR_unscaled.predict(x_solarSDR_test_unscaled)

In [None]:
print(f"Pinballscore train = {pinball_score(y_solarSDR_train_unscaled, prediction_solarSDR_unscaled_train)}")
print(f"Pinballscore test = {pinball_score(y_solarSDR_test_unscaled, prediction_solarSDR_unscaled_test)}")

Pinballscore train = 16.53075008814621
Pinballscore test = 18.398989970341912


In [None]:
x_solarSDR_train_standardScaled = x_solar_train[["SolarDownwardRadiation_standardScaled"]].to_numpy()
x_solarSDR_test_standardScaled = x_solar_test[["SolarDownwardRadiation_standardScaled"]].to_numpy()
y_solarSDR_train_standardScaled = y_solar_train[["Solar_MWh_credit"]].to_numpy()
y_solarSDR_test_standardScaled = y_solar_test[["Solar_MWh_credit"]].to_numpy()

In [None]:
model_solarSDR_standardScaled = XGBRegressor(device="cuda", max_depth=5, n_estimators=1000, objective="reg:quantileerror",
                                             quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_solarSDR_standardScaled.fit(x_solarSDR_train_standardScaled, y_solarSDR_train_standardScaled)

In [None]:
prediction_solarSDR_standardScaled_train = model_solarSDR_standardScaled.predict(x_solarSDR_train_standardScaled)
prediction_solarSDR_standardScaled_test = model_solarSDR_standardScaled.predict(x_solarSDR_test_standardScaled)

In [None]:
print(f"Pinballscore train = {pinball_score(y_solarSDR_train_standardScaled, prediction_solarSDR_standardScaled_train)}")
print(f"Pinballscore test = {pinball_score(y_solarSDR_test_standardScaled, prediction_solarSDR_standardScaled_test)}")

Pinballscore train = 16.39388274510491
Pinballscore test = 18.204264219848476


In [None]:
x_solarSDR_train_minMaxScaled = x_solar_train[["SolarDownwardRadiation_minMaxScaled"]].to_numpy()
x_solarSDR_test_minMaxScaled = x_solar_test[["SolarDownwardRadiation_minMaxScaled"]].to_numpy()
y_solarSDR_train_minMaxScaled = y_solar_train[["Solar_MWh_credit"]].to_numpy()
y_solarSDR_test_minMaxScaled = y_solar_test[["Solar_MWh_credit"]].to_numpy()

In [None]:
model_solarSDR_minMaxScaled = XGBRegressor(device="cuda", max_depth=5, n_estimators=1000, objective="reg:quantileerror",
                                           quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_solarSDR_minMaxScaled.fit(x_solarSDR_train_minMaxScaled, y_solarSDR_train_minMaxScaled)

In [None]:
prediction_solarSDR_minMaxScaled_train = model_solarSDR_minMaxScaled.predict(x_solarSDR_train_minMaxScaled)
prediction_solarSDR_minMaxScaled_test = model_solarSDR_minMaxScaled.predict(x_solarSDR_test_minMaxScaled)

In [None]:
print(f"Pinballscore train = {pinball_score(y_solarSDR_train_minMaxScaled, prediction_solarSDR_minMaxScaled_train)}")
print(f"Pinballscore test = {pinball_score(y_solarSDR_test_minMaxScaled, prediction_solarSDR_minMaxScaled_test)}")

Pinballscore train = 16.393654125371782
Pinballscore test = 18.20289984614799


In [None]:
x_solarSDR_train_normalized = x_solar_train[["SolarDownwardRadiation_normalized"]].to_numpy()
x_solarSDR_test_normalized = x_solar_test[["SolarDownwardRadiation_normalized"]].to_numpy()
y_solarSDR_train_normalized = y_solar_train[["Solar_MWh_credit"]].to_numpy()
y_solarSDR_test_normalized = y_solar_test[["Solar_MWh_credit"]].to_numpy()

In [None]:
model_solarSDR_normalized = XGBRegressor(device="cuda", max_depth=5, n_estimators=1000, objective="reg:quantileerror",
                                           quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_solarSDR_normalized.fit(x_solarSDR_train_normalized, y_solarSDR_train_normalized)

In [None]:
prediction_solarSDR_normalized_train = model_solarSDR_normalized.predict(x_solarSDR_train_normalized)
prediction_solarSDR_normalized_test = model_solarSDR_normalized.predict(x_solarSDR_test_normalized)

In [None]:
print(f"Pinballscore train = {pinball_score(y_solarSDR_train_normalized, prediction_solarSDR_normalized_train)}")
print(f"Pinballscore test = {pinball_score(y_solarSDR_test_normalized, prediction_solarSDR_normalized_test)}")

Pinballscore train = 43.673154336832425
Pinballscore test = 49.23400086387605


In [None]:
x_solarSDR_train_unscaled_withOhe = x_solar_train[["SolarDownwardRadiation", 'time_of_day_afternoon', 'time_of_day_morning', 'time_of_day_night',
                                                   'season_autumn', 'season_spring', 'season_summer', 'season_winter']].to_numpy()
x_solarSDR_test_unscaled_withOhe = x_solar_test[["SolarDownwardRadiation", 'time_of_day_afternoon', 'time_of_day_morning', 'time_of_day_night',
                                                   'season_autumn', 'season_spring', 'season_summer', 'season_winter']].to_numpy()
y_solarSDR_train_unscaled_withOhe = y_solar_train[["Solar_MWh_credit"]].to_numpy()
y_solarSDR_test_unscaled_withOhe = y_solar_test[["Solar_MWh_credit"]].to_numpy()

In [None]:
model_solarSDR_unscaled_withOhe = XGBRegressor(device="cuda", max_depth=5, n_estimators=1000, objective="reg:quantileerror",
                                           quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_solarSDR_unscaled_withOhe.fit(x_solarSDR_train_unscaled_withOhe, y_solarSDR_train_unscaled_withOhe)

In [None]:
prediction_solarSDR_unscaled_withOhe_train = model_solarSDR_unscaled_withOhe.predict(x_solarSDR_train_unscaled_withOhe)
prediction_solarSDR_unscaled_withOhe_test = model_solarSDR_unscaled_withOhe.predict(x_solarSDR_test_unscaled_withOhe)

In [None]:
print(f"Pinballscore train = {pinball_score(y_solarSDR_train_unscaled_withOhe, prediction_solarSDR_unscaled_withOhe_train)}")
print(f"Pinballscore test = {pinball_score(y_solarSDR_test_unscaled_withOhe, prediction_solarSDR_unscaled_withOhe_test)}")

Pinballscore train = 14.339490310678393
Pinballscore test = 16.03308297458021


## Wind-Modell mit WindSpeedPCA, hours_after und Tages/Jahreszeit

In [None]:
wind_featues_1 = ["WindSpeedPCA", "hours_after", "time_of_day_afternoon", "time_of_day_morning",
                "time_of_day_night", "season_autumn", "season_spring", "season_summer", "season_winter"]

In [None]:
x_wind_train = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Train/x_wind_train.parquet")
x_wind_test = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Test/x_wind_test.parquet")
y_wind_train = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Train/y_wind_train.parquet")
y_wind_test = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Test/y_wind_test.parquet")

In [None]:
x_wind_train_1 = x_wind_train[wind_featues_1].to_numpy()
x_wind_test_1 = x_wind_test[wind_featues_1].to_numpy()
y_wind_train_1 = y_wind_train[["Wind_MWh_credit"]].to_numpy()
y_wind_test_1 = y_wind_test[["Wind_MWh_credit"]].to_numpy()

In [None]:
model_wind_1 = XGBRegressor(device="cuda", max_depth=5, n_estimators=1000, objective="reg:quantileerror",
                            quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_wind_1.fit(x_wind_train_1, y_wind_train_1)

# --> with max_depth=2, n_estimators=1000
# Pinballscore train = 36.45833513951377
# Pinballscore test = 36.510687025119715

# --> with max_depth=5, n_estimators=1000
# Pinballscore train = 35.48570153209384
# Pinballscore test = 36.1215360160826

# --> with max_depth=8, n_estimators=1000
# Pinballscore train = 34.765369554680156
# Pinballscore test = 36.478808455842994

In [None]:
prediction_wind_train_1 = model_wind_1.predict(x_wind_train_1)
prediction_wind_test_1 = model_wind_1.predict(x_wind_test_1)

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




In [None]:
print(f"Pinballscore train = {pinball_score(y_wind_train_1, prediction_wind_train_1)}")
print(f"Pinballscore test = {pinball_score(y_wind_test_1, prediction_wind_test_1)}")

Pinballscore train = 35.48570153209384
Pinballscore test = 36.1215360160826


In [None]:
get_feature_importance(model_wind_1, wind_featues_1)

Unnamed: 0,Feature,Importance
0,WindSpeedPCA,0.677364
1,hours_after,0.211001
5,season_autumn,0.032687
8,season_winter,0.025473
6,season_spring,0.023041
7,season_summer,0.01181
2,time_of_day_afternoon,0.007709
3,time_of_day_morning,0.006385
4,time_of_day_night,0.00453


### und hours_after nur bis 51.5

In [None]:
x_wind_train_2 = x_wind_train[wind_featues_1]
x_wind_train_2 = x_wind_train_2[x_wind_train_2["hours_after"] <= 51.5]
x_wind_test_2 = x_wind_test[wind_featues_1]
x_wind_test_2 = x_wind_test_2[x_wind_test_2["hours_after"] <= 51.5]
y_wind_train_2 = y_wind_train[["Wind_MWh_credit"]]
y_wind_train_2 = y_wind_train_2.iloc[x_wind_train_2.index]
y_wind_test_2 = y_wind_test[["Wind_MWh_credit"]]
y_wind_test_2 = y_wind_test_2.iloc[x_wind_test_2.index]

In [None]:
model_wind_2 = XGBRegressor(device="cuda", max_depth=7, n_estimators=1000, objective="reg:quantileerror",
                            quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_wind_2.fit(x_wind_train_2.to_numpy(), y_wind_train_2.to_numpy())

# --> with max_depth=5, n_estimators=1000
# Pinballscore train = 26.599748467573832
# Pinballscore test = 24.04590892199395

# --> with max_depth=3, n_estimators=1000
# Pinballscore train = 27.09914364644392
# Pinballscore test = 23.985873543956767

# --> with max_depth=7, n_estimators=1000
# Pinballscore train = 26.20058535495109
# Pinballscore test = 24.250295550877045

In [None]:
prediction_wind_train_2 = model_wind_2.predict(x_wind_train_2.to_numpy())
prediction_wind_test_2 = model_wind_2.predict(x_wind_test_2.to_numpy())

In [None]:
print(f"Pinballscore train = {pinball_score(y_wind_train_2, prediction_wind_train_2)}")
print(f"Pinballscore test = {pinball_score(y_wind_test_2, prediction_wind_test_2)}")

Pinballscore train = 26.20058535495109
Pinballscore test = 24.250295550877045


In [None]:
get_feature_importance(model_wind_2, wind_featues_1)

Unnamed: 0,Feature,Importance
0,WindSpeedPCA,0.786802
5,season_autumn,0.045566
8,season_winter,0.04441
1,hours_after,0.042062
7,season_summer,0.023054
6,season_spring,0.021039
2,time_of_day_afternoon,0.013063
3,time_of_day_morning,0.012894
4,time_of_day_night,0.011111


## Wind-Modell mit WindSpeedPCA, hours_after, Jahreszeit und WindDirections

### und mit hours_after bis 51.5

In [None]:
wind_featues_2 = ["WindSpeedPCA", "hours_after", "season_autumn", "season_spring", "season_summer", "season_winter",
                  "wind_dir_cat_E", "wind_dir_cat_N", "wind_dir_cat_NE", "wind_dir_cat_NW", "wind_dir_cat_S",
                  "wind_dir_cat_SE", "wind_dir_cat_SW", "wind_dir_cat_W"]

In [None]:
x_wind_train.columns

Index(['RelativeHumidity', 'temp_hornsea', 'temp_solar', 'WindDirection',
       'WindDirection:100', 'year', 'month', 'day', 'hour', 'hours_after',
       'wind_interaction', 'wind_interaction_100', 'humidity_wind_interaction',
       'wind_gradient', 'time_of_day_afternoon', 'time_of_day_morning',
       'time_of_day_night', 'season_autumn', 'season_spring', 'season_summer',
       'season_winter', 'wind_dir_cat_E', 'wind_dir_cat_N', 'wind_dir_cat_NE',
       'wind_dir_cat_NW', 'wind_dir_cat_S', 'wind_dir_cat_SE',
       'wind_dir_cat_SW', 'wind_dir_cat_W', 'WindSpeedPCA'],
      dtype='object')

In [None]:
x_wind_train_3 = x_wind_train[wind_featues_2]
x_wind_train_3 = x_wind_train_3[x_wind_train_3["hours_after"] <= 51.5]
x_wind_test_3 = x_wind_test[wind_featues_2]
x_wind_test_3 = x_wind_test_3[x_wind_test_3["hours_after"] <= 51.5]
y_wind_train_3 = y_wind_train[["Wind_MWh_credit"]]
y_wind_train_3 = y_wind_train_3.iloc[x_wind_train_3.index]
y_wind_test_3 = y_wind_test[["Wind_MWh_credit"]]
y_wind_test_3 = y_wind_test_3.iloc[x_wind_test_3.index]

In [None]:
model_wind_3 = XGBRegressor(device="cuda", max_depth=5, n_estimators=1000, objective="reg:quantileerror",
                            quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_wind_3.fit(x_wind_train_3.to_numpy(), y_wind_train_3.to_numpy())

In [None]:
prediction_wind_train_3 = model_wind_3.predict(x_wind_train_3.to_numpy())
prediction_wind_test_3 = model_wind_3.predict(x_wind_test_3.to_numpy())

In [None]:
print(f"Pinballscore train = {pinball_score(y_wind_train_3, prediction_wind_train_3)}")
print(f"Pinballscore test = {pinball_score(y_wind_test_3, prediction_wind_test_3)}")

Pinballscore train = 25.37604252635143
Pinballscore test = 23.51769042374634


In [None]:
get_feature_importance(model_wind_3, wind_featues_2)

Unnamed: 0,Feature,Importance
0,WindSpeedPCA,0.727229
1,hours_after,0.082648
2,season_autumn,0.032575
5,season_winter,0.028133
3,season_spring,0.022751
10,wind_dir_cat_S,0.018571
9,wind_dir_cat_NW,0.016815
4,season_summer,0.01346
12,wind_dir_cat_SW,0.011782
7,wind_dir_cat_N,0.011158


### Grid Search für das Wind Modell

In [None]:
# Beste Parameter: {'n_estimators': 1000, 'max_depth': 4, 'eta': 0.1}

params = {
    "max_depth": [1, 2, 3, 4, 5],
    "n_estimators": [200, 500, 1000],
    "eta": [0.01, 0.1, 0.3]
}

# Variable für die besten Parameter und den besten Fehler initialisieren
best_params = None
best_score = float('inf')

# Manuelle Grid-Search durchführen
for n_estimators in params['n_estimators']:
    for max_depth in params['max_depth']:
        for eta in params['eta']:
            # Modell mit aktuellen Parametern trainieren
            grid_model = XGBRegressor(device="cuda", objective="reg:quantileerror",
                          quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
                          n_estimators=n_estimators, max_depth=max_depth, eta=eta)
            grid_model.fit(x_wind_train_3.to_numpy(), y_wind_train_3.to_numpy())

            # Vorhersagen und Fehler berechnen
            y_pred = grid_model.predict(x_wind_test_3.to_numpy())
            score = pinball_score(y_wind_test_3, y_pred)
            print(f"params: ({n_estimators}, {max_depth}, {eta}): {score}")

            # Beste Parameter speichern
            if score < best_score:
                best_score = score
                best_params = {
                    'n_estimators': n_estimators,
                    'max_depth': max_depth,
                    'eta': eta
                }

print(f"Beste Parameter: {best_params}")

params: (200, 1, 0.01): 36.091014639345424
params: (200, 1, 0.1): 25.537612146558782
params: (200, 1, 0.3): 25.577426671302902
params: (200, 2, 0.01): 30.86302362291386
params: (200, 2, 0.1): 24.101959106129712
params: (200, 2, 0.3): 23.895595824210105
params: (200, 3, 0.01): 29.894237849632354
params: (200, 3, 0.1): 23.58385606258305
params: (200, 3, 0.3): 23.528862835504725
params: (200, 4, 0.01): 29.269809326157638
params: (200, 4, 0.1): 23.2909243853893
params: (200, 4, 0.3): 23.398992535186085
params: (200, 5, 0.01): 29.087999849232954
params: (200, 5, 0.1): 23.303282597830876
params: (200, 5, 0.3): 23.41075175164137
params: (500, 1, 0.01): 26.545414169072636
params: (500, 1, 0.1): 25.51573634004579
params: (500, 1, 0.3): 25.537404844177633
params: (500, 2, 0.01): 24.70560556423952
params: (500, 2, 0.1): 24.070104084023118
params: (500, 2, 0.3): 23.87065716400953
params: (500, 3, 0.01): 24.057090664853412
params: (500, 3, 0.1): 23.528371627293197
params: (500, 3, 0.3): 23.50513480

## Wind-Modell mit WindSpeedPCA (aber obere n% abgeschnitten), hours_after, Jahreszeit und WindDirections

In [None]:
def remove_upperbound(merged_table_features, percentage=0.01):
    columns = ["WindSpeedPCA"]
    n = round(len(merged_table_features) * percentage)
    indexes = set()
    for col in columns:
        indexes.update(set(merged_table_features[col].nlargest(n).index))
    return merged_table_features.drop(indexes)#.reset_index(drop=True)

In [None]:
x_wind_train_4 = x_wind_train[wind_featues_2]
x_wind_train_4 = remove_upperbound(x_wind_train_4[x_wind_train_4["hours_after"] <= 51.5])
x_wind_test_4 = x_wind_test[wind_featues_2]
x_wind_test_4 = x_wind_test_4[x_wind_test_4["hours_after"] <= 51.5]
y_wind_train_4 = y_wind_train[["Wind_MWh_credit"]]
y_wind_train_4 = y_wind_train_4.iloc[x_wind_train_4.index]
y_wind_test_4 = y_wind_test[["Wind_MWh_credit"]]
y_wind_test_4 = y_wind_test_4.iloc[x_wind_test_4.index]

In [None]:
model_wind_4 = XGBRegressor(device="cuda", max_depth=4, n_estimators=1000, eta=0.1, objective="reg:quantileerror",
                            quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_wind_4.fit(x_wind_train_4.to_numpy(), y_wind_train_4.to_numpy())

# --> with max_depth=4, n_estimators=1000, eta=0.1 und obere 2% abgeschnitten
# Pinballscore train = 25.84152516101148
# Pinballscore test = 23.17772253487003

# --> with max_depth=4, n_estimators=1000, eta=0.1 und obere 5% abgeschnitten
# Pinballscore train = 25.9376663102129
# Pinballscore test = 23.210459470270266

# --> with max_depth=4, n_estimators=1000, eta=0.1 und obere 3% abgeschnitten
# Pinballscore train = 25.939832964272874
# Pinballscore test = 23.25161522368707

# --> with max_depth=4, n_estimators=1000, eta=0.1 und obere 1% abgeschnitten
# Pinballscore train = 25.788378583154493
# Pinballscore test = 23.145916365725164

In [None]:
prediction_wind_train_4 = model_wind_4.predict(x_wind_train_4.to_numpy())
prediction_wind_test_4 = model_wind_4.predict(x_wind_test_4.to_numpy())

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




In [None]:
print(f"Pinballscore train = {pinball_score(y_wind_train_4, prediction_wind_train_4)}")
print(f"Pinballscore test = {pinball_score(y_wind_test_4, prediction_wind_test_4)}")

Pinballscore train = 25.788378583154493
Pinballscore test = 23.145916365725164


In [None]:
get_feature_importance(model_wind_4, wind_featues_2)

Unnamed: 0,Feature,Importance
0,WindSpeedPCA,0.711018
1,hours_after,0.042219
3,season_spring,0.036434
2,season_autumn,0.033826
10,wind_dir_cat_S,0.032451
9,wind_dir_cat_NW,0.020577
7,wind_dir_cat_N,0.020188
5,season_winter,0.019638
13,wind_dir_cat_W,0.018438
12,wind_dir_cat_SW,0.01612


In [None]:
model_wind_4.save_model("/content/drive/MyDrive/Colab Notebooks/model_wind_241024.json")

## Wind Modell mit 48h nach ref_time

In [None]:
x_wind_train = x_wind_train[x_wind_train["hours_after"] <= 48]
x_wind_test = x_wind_test[x_wind_test["hours_after"] <= 48]
y_wind_train = y_wind_train.iloc[x_wind_train.index]
y_wind_test = y_wind_test.iloc[x_wind_test.index]

In [None]:
wind_featues_3 = ["WindSpeedPCA", "season_autumn", "season_spring", "season_summer", "season_winter",
                  "wind_dir_cat_E", "wind_dir_cat_N", "wind_dir_cat_NE", "wind_dir_cat_NW", "wind_dir_cat_S",
                  "wind_dir_cat_SE", "wind_dir_cat_SW", "wind_dir_cat_W"]

In [None]:
x_wind_train_5 = x_wind_train[wind_featues_3]
x_wind_test_5 = x_wind_test[wind_featues_3]
y_wind_train_5 = y_wind_train[["Wind_MWh_credit"]]
y_wind_test_5 = y_wind_test[["Wind_MWh_credit"]]

In [None]:
model_wind_5 = XGBRegressor(device="cuda", max_depth=4, n_estimators=1000, eta=0.1, objective="reg:quantileerror",
                            quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_wind_5.fit(x_wind_train_5.to_numpy(), y_wind_train_5.to_numpy())

In [None]:
prediction_wind_train_5 = model_wind_5.predict(x_wind_train_5.to_numpy())
prediction_wind_test_5 = model_wind_5.predict(x_wind_test_5.to_numpy())

In [None]:
print(f"Pinballscore train = {pinball_score(y_wind_train_5, prediction_wind_train_5)}")
print(f"Pinballscore test = {pinball_score(y_wind_test_5, prediction_wind_test_5)}")

Pinballscore train = 25.730691074789505
Pinballscore test = 22.914120411148463


In [None]:
get_feature_importance(model_wind_5, wind_featues_3)

Unnamed: 0,Feature,Importance
0,WindSpeedPCA,0.710905
1,season_autumn,0.039653
11,wind_dir_cat_SW,0.039627
2,season_spring,0.039382
9,wind_dir_cat_S,0.029933
5,wind_dir_cat_E,0.027764
4,season_winter,0.026436
10,wind_dir_cat_SE,0.016795
12,wind_dir_cat_W,0.01618
8,wind_dir_cat_NW,0.015634


## Solar Model

### mit Features (temp_solar, CloudCover, SolarDownwardRadiation, RelativeHumidity, hours_after, month, day, hour)

In [None]:
solar_features = ["temp_solar", "CloudCover", "SolarDownwardRadiation", "RelativeHumidity", "month", "day", "hour"]

In [None]:
x_solar_train = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Train/x_solar_train.parquet")
x_solar_test = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Test/x_solar_test.parquet")
y_solar_train = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Train/y_solar_train.parquet")
y_solar_test = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Test/y_solar_test.parquet")

x_solar_train = x_solar_train[x_solar_train["hours_after"] <= 51.5]
x_solar_test = x_solar_test[x_solar_test["hours_after"] <= 51.5]
y_solar_train = y_solar_train.iloc[x_solar_train.index]
y_solar_test = y_solar_test.iloc[x_solar_test.index]

In [None]:
x_solar_train_1 = x_solar_train[solar_features]
x_solar_test_1 = x_solar_test[solar_features]
y_solar_train_1 = y_solar_train[["Solar_MWh_credit"]]
y_solar_test_1 = y_solar_test[["Solar_MWh_credit"]]

In [None]:
model_solar_1 = XGBRegressor(device="cuda", max_depth=7, n_estimators=300, eta=0.1, objective="reg:quantileerror",
                             quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_solar_1.fit(x_solar_train_1.to_numpy(), y_solar_train_1.to_numpy())

In [None]:
prediction_solar_train_1 = model_solar_1.predict(x_solar_train_1.to_numpy())
prediction_solar_test_1 = model_solar_1.predict(x_solar_test_1.to_numpy())

In [None]:
print(f"Pinballscore train = {pinball_score(y_solar_train_1, prediction_solar_train_1)}")
print(f"Pinballscore test = {pinball_score(y_solar_test_1, prediction_solar_test_1)}")

Pinballscore train = 8.768072519960612
Pinballscore test = 9.43478388918047


In [None]:
get_feature_importance(model_solar_1, solar_features)

Unnamed: 0,Feature,Importance
2,SolarDownwardRadiation,0.946432
6,hour,0.031324
4,month,0.006281
1,CloudCover,0.005778
5,day,0.004438
3,RelativeHumidity,0.003595
0,temp_solar,0.002152


In [None]:
model_solar_1.save_model("/content/drive/MyDrive/Colab Notebooks/model_solar_241024.json")

## mit SolarDownwardRadiation, Tages/Jahreszeit und hour

In [None]:
x_solar_train.columns

Index(['CloudCover', 'SolarDownwardRadiation', 'temp_hornsea', 'temp_solar',
       'year', 'month', 'day', 'hour', 'hours_after',
       'adjusted_solar_radiation', 'temp_x_solar_interaction',
       'temp_y_solar_interaction', 'CloudCover_lag_1h', 'RelativeHumidity',
       'cloud_cover_change', 'time_of_day_afternoon', 'time_of_day_morning',
       'time_of_day_night', 'season_autumn', 'season_spring', 'season_summer',
       'season_winter'],
      dtype='object')

In [None]:
solar_features_2 = ['SolarDownwardRadiation', 'time_of_day_afternoon', 'time_of_day_morning', 'time_of_day_night', 'hour',
                    "CloudCover", 'temp_y_solar_interaction', 'CloudCover_lag_1h']

In [None]:
x_solar_train_2 = x_solar_train[solar_features_2]
x_solar_test_2 = x_solar_test[solar_features_2]
y_solar_train_2 = y_solar_train[["Solar_MWh_credit"]]
y_solar_test_2 = y_solar_test[["Solar_MWh_credit"]]

In [None]:
model_solar_2 = XGBRegressor(device="cuda", max_depth=7, n_estimators=100, eta=0.1, objective="reg:quantileerror",
                             quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_solar_2.fit(x_solar_train_2.to_numpy(), y_solar_train_2.to_numpy())

# --> with max_depth=7, n_estimators=1000, eta=0.1
# Pinballscore train = 9.095255271305168
# Pinballscore test = 9.257142643121577

# --> with max_depth=7, n_estimators=300, eta=0.1
# Pinballscore train = 9.258278688973904
# Pinballscore test = 9.227269947899181

In [None]:
prediction_solar_train_2 = model_solar_2.predict(x_solar_train_2.to_numpy())
prediction_solar_test_2 = model_solar_2.predict(x_solar_test_2.to_numpy())

In [None]:
prediction_solar_test_2[prediction_solar_test_2 < 0] = 0
prediction_solar_test_2_df = pd.DataFrame(prediction_solar_test_2, columns=["q10", "q20", "q30", "q40", "q50", "q60", "q70", "q80", "q90"])
prediction_solar_test_2_df.describe()

Unnamed: 0,q10,q20,q30,q40,q50,q60,q70,q80,q90
count,38168.0,38168.0,38168.0,38168.0,38168.0,38168.0,38168.0,38168.0,38168.0
mean,51.795174,63.862602,72.773041,80.215538,87.015404,93.588959,101.029381,109.716255,122.593483
std,99.584488,117.395058,130.302444,140.838089,149.282867,157.328247,166.306046,176.956024,192.275055
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.002436,0.002436,0.002436,0.002436,0.002436,0.002436,0.002436,0.002436,0.002436
50%,0.002436,0.002436,0.002436,0.002436,0.002436,0.002436,0.002436,0.002436,0.006014
75%,66.983246,86.597515,98.161079,113.227758,126.569469,141.073746,157.352268,176.946014,206.940086
max,636.242004,678.018921,714.687866,743.365356,739.177795,742.066284,760.526733,776.994995,817.603088


In [None]:
print(f"Pinballscore train = {pinball_score(y_solar_train_2, prediction_solar_train_2)}")
print(f"Pinballscore test = {pinball_score(y_solar_test_2, prediction_solar_test_2)}")

Pinballscore train = 9.476669943722298
Pinballscore test = 9.121101755810281


In [None]:
get_feature_importance(model_solar_2, solar_features_2)

Unnamed: 0,Feature,Importance
0,SolarDownwardRadiation,0.806933
3,time_of_day_night,0.114886
4,hour,0.035218
2,time_of_day_morning,0.012994
1,time_of_day_afternoon,0.010683
6,temp_y_solar_interaction,0.008695
5,CloudCover,0.007289
7,CloudCover_lag_1h,0.003302


In [None]:
# Beste Parameter:

params = {
    "max_depth": [1, 3, 5, 7, 9],
    "n_estimators": [100, 200, 300, 1000],
    "eta": [0.01, 0.1, 0.3]
}

# Variable für die besten Parameter und den besten Fehler initialisieren
best_params = None
best_score = float('inf')

# Manuelle Grid-Search durchführen
for n_estimators in params['n_estimators']:
    for max_depth in params['max_depth']:
        for eta in params['eta']:
            # Modell mit aktuellen Parametern trainieren
            grid_model = XGBRegressor(device="cuda", objective="reg:quantileerror",
                          quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
                          n_estimators=n_estimators, max_depth=max_depth, eta=eta)
            grid_model.fit(x_solar_train_2.to_numpy(), y_solar_train_2.to_numpy())

            # Vorhersagen und Fehler berechnen
            y_pred = grid_model.predict(x_solar_test_2.to_numpy())
            score = pinball_score(y_solar_test_2, y_pred)
            print(f"params: ({n_estimators}, {max_depth}, {eta}): {score}")

            # Beste Parameter speichern
            if score < best_score:
                best_score = score
                best_params = {
                    'n_estimators': n_estimators,
                    'max_depth': max_depth,
                    'eta': eta
                }

print(f"Beste Parameter: {best_params}")

# with: solar_features_2 = ['SolarDownwardRadiation', 'time_of_day_afternoon', 'time_of_day_morning', 'time_of_day_night', 'hour',
                          # "CloudCover", 'temp_y_solar_interaction', "RelativeHumidity", 'CloudCover_lag_1h']

params: (100, 1, 0.01): 29.40528951840857
params: (100, 1, 0.1): 14.664383255907062
params: (100, 1, 0.3): 14.475498876939477
params: (100, 3, 0.01): 27.29556612837418
params: (100, 3, 0.1): 9.889167870856069
params: (100, 3, 0.3): 9.95157165042151
params: (100, 5, 0.01): 27.06352246785818
params: (100, 5, 0.1): 9.258359637051209
params: (100, 5, 0.3): 9.250064207854724
params: (100, 7, 0.01): 26.99501314343674
params: (100, 7, 0.1): 9.102707923662994
params: (100, 7, 0.3): 9.19007244045971
params: (100, 9, 0.01): 27.050876704110053
params: (100, 9, 0.1): 9.249097600967168
params: (100, 9, 0.3): 9.320905381252048
params: (200, 1, 0.01): 19.734084847761363
params: (200, 1, 0.1): 14.66369410718869
params: (200, 1, 0.3): 14.475498876939472
params: (200, 3, 0.01): 16.156511210795582
params: (200, 3, 0.1): 9.811652598152552
params: (200, 3, 0.3): 9.895061280300514
params: (200, 5, 0.01): 15.240869939866124
params: (200, 5, 0.1): 9.22704981957001
params: (200, 5, 0.3): 9.256369657584179
para

0