In [None]:
from xgboost import XGBRegressor
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_pinball_loss, make_scorer

In [None]:
def get_feature_importance(model, features):
  # Get feature importances from the trained XGBoost model
  feature_importances = model.feature_importances_

  # Create a DataFrame to display feature importances
  feature_importance_df = pd.DataFrame({'Feature': features,
                                        'Importance': feature_importances})

  # Sort the DataFrame by importance in descending order
  feature_importance_df = feature_importance_df.sort_values('Importance', ascending=False)

  # feature importance DataFrame
  return feature_importance_df

In [None]:
def pinball_score(y_true, y_pred):
    return np.array([mean_pinball_loss(y_true, pred, alpha=(i+1)/10) for i, pred in enumerate(y_pred.T)]).mean()

## Wind-Modell mit WindSpeedPCA, hours_after und Tages/Jahreszeit (mit ncep-Daten)

In [None]:
wind_featues_1 = ["WindSpeed:100", "hours_after", "time_of_day_afternoon", "time_of_day_morning",
                "time_of_day_night", "season_autumn", "season_spring", "season_summer", "season_winter"]

In [None]:
x_wind_train = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Train/x_wind_train_ncep.parquet")
x_wind_test = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Test/x_wind_test_ncep.parquet")
y_wind_train = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Train/y_wind_train_ncep.parquet")
y_wind_test = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Test/y_wind_test_ncep.parquet")

In [None]:
x_wind_test = x_wind_test[x_wind_test["hours_after"] <= 48]
y_wind_test = y_wind_test.iloc[x_wind_test.index]

In [None]:
x_wind_train_1 = x_wind_train[wind_featues_1]
x_wind_test_1 = x_wind_test[wind_featues_1]
y_wind_train_1 = y_wind_train[["Wind_MWh_credit"]]
y_wind_test_1 = y_wind_test[["Wind_MWh_credit"]]

In [None]:
model_wind_1 = XGBRegressor(device="cuda", max_depth=4, n_estimators=1000,  eta=0.1, objective="reg:quantileerror",
                            quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_wind_1.fit(x_wind_train_1.to_numpy(), y_wind_train_1.to_numpy())



KeyboardInterrupt: 

## Solar-Modell

In [None]:
x_solar_train = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Train/x_solar_train_ncep.parquet")
x_solar_test = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Test/x_solar_test_ncep.parquet")
y_solar_train = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Train/y_solar_train_ncep.parquet")
y_solar_test = pd.read_parquet("/content/drive/MyDrive/Colab Notebooks/Daten/Test/y_solar_test_ncep.parquet")

# x_solar_train = x_solar_train[x_solar_train["hours_after"] <= 51.5]
x_solar_test = x_solar_test[x_solar_test["hours_after"] <= 51.5]
# y_solar_train = y_solar_train.iloc[x_solar_train.index]
y_solar_test = y_solar_test.iloc[x_solar_test.index]

In [None]:
x_solar_train

Unnamed: 0,CloudCover,SolarDownwardRadiation,temp_hornsea,temp_solar,year,month,day,hour,hours_after,adjusted_solar_radiation,...,CloudCover_lag_1h,RelativeHumidity,cloud_cover_change,time_of_day_afternoon,time_of_day_morning,time_of_day_night,season_autumn,season_spring,season_summer,season_winter
0,0.0260,0.0,15.450012,13.940016,2020,9,20,0,0.0,0.0,...,,84.066666,,0.0,0.0,1.0,1.0,0.0,0.0,0.0
1,0.0390,0.0,15.397076,13.896579,2020,9,20,0,0.5,0.0,...,0.0260,84.433334,0.0130,0.0,0.0,1.0,1.0,0.0,0.0,0.0
2,0.0520,0.0,15.344140,13.853142,2020,9,20,1,1.0,0.0,...,0.0390,84.800003,0.0130,0.0,0.0,1.0,1.0,0.0,0.0,0.0
3,0.0385,0.0,15.305976,13.847113,2020,9,20,1,1.5,0.0,...,0.0520,84.933334,-0.0135,0.0,0.0,1.0,1.0,0.0,0.0,0.0
4,0.0250,0.0,15.267812,13.841084,2020,9,20,2,2.0,0.0,...,0.0385,85.066666,-0.0135,0.0,0.0,1.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3790144,0.0000,0.0,11.047282,9.438393,2024,5,19,21,21.5,0.0,...,0.0000,93.683334,0.0000,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3790145,0.0000,0.0,10.946550,9.157107,2024,5,19,22,22.0,0.0,...,0.0000,94.700005,0.0000,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3790146,0.0000,0.0,10.785738,8.955181,2024,5,19,22,22.5,0.0,...,0.0000,95.016670,0.0000,0.0,0.0,1.0,0.0,1.0,0.0,0.0
3790147,0.0000,0.0,10.624926,8.753255,2024,5,19,23,23.0,0.0,...,0.0000,95.333336,0.0000,0.0,0.0,1.0,0.0,1.0,0.0,0.0


In [None]:
solar_features = ['SolarDownwardRadiation', 'time_of_day_afternoon', 'time_of_day_morning', 'time_of_day_night',
                  'hour', "CloudCover", 'temp_y_solar_interaction', 'CloudCover_lag_1h']

In [None]:
x_solar_train_1 = x_solar_train[solar_features]
x_solar_test_1 = x_solar_test[solar_features]
y_solar_train_1 = y_solar_train[["Solar_MWh_credit"]]
y_solar_test_1 = y_solar_test[["Solar_MWh_credit"]]

In [None]:
model_solar_1 = XGBRegressor(device="cuda", max_depth=7, n_estimators=500, eta=0.1, objective="reg:quantileerror",
                             quantile_alpha=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])
model_solar_1.fit(x_solar_train_1.to_numpy(), y_solar_train_1.to_numpy())

In [None]:
prediction_solar_train_1 = model_solar_1.predict(x_solar_train_1.to_numpy())
prediction_solar_test_1 = model_solar_1.predict(x_solar_test_1.to_numpy())

In [None]:
print(f"Pinballscore train = {pinball_score(y_solar_train_1, prediction_solar_train_1)}")
print(f"Pinballscore test = {pinball_score(y_solar_test_1, prediction_solar_test_1)}")

Pinballscore train = 16.905854267572913
Pinballscore test = 13.781035979650989


In [None]:
get_feature_importance(model_solar_1, solar_features)

Unnamed: 0,Feature,Importance
0,SolarDownwardRadiation,0.69204
3,time_of_day_night,0.150394
4,hour,0.075746
6,temp_y_solar_interaction,0.0356
1,time_of_day_afternoon,0.028581
7,CloudCover_lag_1h,0.009033
2,time_of_day_morning,0.004823
5,CloudCover,0.003782
