In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
import statsmodels.formula.api as smf
from tqdm import tqdm

In [2]:
# Funktion zur Vorbereitung von CSV-Daten (Konvertierung der 'dtm' Spalte in datetime-Format)
def pre_csv(df):
    df.dtm = pd.to_datetime(df.dtm)  # Konvertiert die 'dtm' Spalte in das Datumsformat
    return df

# Funktion zur Vorbereitung von DWD-Daten (Konvertierung und Bereinigung)
def pre_dwd(df):
    # Umwandeln in DataFrame, Umbenennen von Spalten und Zeitzonen-Anpassungen
    df = df.to_dataframe().reset_index().rename(columns={"ref_datetime": "reference_time", "valid_datetime": "valid_time"})
    df.reference_time = df.reference_time.dt.tz_localize("UTC")  # Lokalisierung der 'reference_time' auf UTC
    df.valid_time = df.reference_time + df.valid_time * pd.Timedelta(1, "h")  # Berechnung von 'valid_time' basierend auf Stunden
    return df

# Platzhalter für zukünftige NCEP-Datenvorbereitung (aktuell wird nichts gemacht)
def pre_ncep(df):
    return df

# Öffnen und Vorbereiten mehrerer DWD-Datensätze (PES)
df_pes_0 = pre_dwd(xr.open_dataset("data/dwd_icon_eu_pes10_20200920_20231027.nc"))
df_pes_1 = pre_dwd(xr.open_dataset("data/dwd_icon_eu_pes10_20231027_20240108.nc"))
df_pes_2 = pre_dwd(xr.open_dataset("data/dwd_icon_eu_pes10_20240108_20240129.nc"))
df_pes_3 = pre_dwd(xr.open_dataset("data/dwd_icon_eu_pes10_20240129_20240519.nc"))

# Zusammenführen, Sortieren und Bereinigen der PES-Daten
df_pes = pd.concat([df_pes_0, df_pes_1, df_pes_2, df_pes_3]).sort_values(["reference_time", "valid_time"]).reset_index(drop=True)
del df_pes_0, df_pes_1, df_pes_2, df_pes_3  # Speicherbereinigung

# Gruppieren und Mittelwertbildung, Entfernen nicht benötigter Spalten
df_pes = df_pes.groupby(["reference_time", "valid_time"]).mean().reset_index().drop(columns=["point", "longitude", "latitude"])

# Interpolation auf 30-Minuten-Intervalle
df_pes = df_pes.set_index("valid_time").groupby(["reference_time"]).resample("30min").interpolate("linear").drop(columns="reference_time").reset_index()
df_pes.head()  # Ausgabe des DataFrame-Kopfs zur Überprüfung

# Gleicher Prozess für Hornsea-Daten
df_hornsea_0 = pre_dwd(xr.open_dataset("data/dwd_icon_eu_hornsea_1_20200920_20231027.nc"))
df_hornsea_1 = pre_dwd(xr.open_dataset("data/dwd_icon_eu_hornsea_1_20231027_20240108.nc"))
df_hornsea_2 = pre_dwd(xr.open_dataset("data/dwd_icon_eu_hornsea_1_20240108_20240129.nc"))
df_hornsea_3 = pre_dwd(xr.open_dataset("data/dwd_icon_eu_hornsea_1_20240129_20240519.nc"))

df_hornsea = pd.concat([df_hornsea_0, df_hornsea_1, df_hornsea_2, df_hornsea_3]).sort_values(["reference_time", "valid_time"]).reset_index(drop=True)
del df_hornsea_0, df_hornsea_1, df_hornsea_2, df_hornsea_3  # Speicherbereinigung

df_hornsea = df_hornsea.groupby(["reference_time", "valid_time"]).mean().reset_index().drop(columns=["longitude", "latitude"])

df_hornsea = df_hornsea.set_index("valid_time").groupby(["reference_time"]).resample("30min").interpolate("linear").drop(columns="reference_time").reset_index()
df_hornsea.head()  # Ausgabe des DataFrame-Kopfs zur Überprüfung

# Einlesen und Vorbereiten von CSV-Daten
df_0 = pre_csv(pd.read_csv("data/Energy_Data_20200920_20240118.csv"))
df_1 = pre_csv(pd.read_csv("data/Energy_Data_20240119_20240519.csv"))

# Zusammenführen und Sortieren der CSV-Daten
df = pd.concat([df_0, df_1]).sort_values(["dtm"]).reset_index(drop=True)
del df_0, df_1  # Speicherbereinigung

# Berechnen von Wind- und Solar-MWh-Krediten (Abzug von BOA für Wind)
df["Wind_MWh_credit"] = 0.5 * df["Wind_MW"] - df["boa_MWh"]
df["Solar_MWh_credit"] = 0.5 * df["Solar_MW"]

# Zusammenführen der PES- und Hornsea-Daten
df_full = pd.merge(df_pes, df_hornsea, on=["reference_time", "valid_time"])

# Zusammenführen mit CSV-Daten basierend auf der 'valid_time'
df_full = df_full.merge(df[["dtm", "Wind_MWh_credit", "Solar_MWh_credit"]], left_on="valid_time", right_on="dtm", how="left")

# Berechnung zusätzlicher Spalten (Vorhersagezeit, Jahr, Monat, Tag, Stunde)
df_full["forcast_hours"] = (df_full.valid_time - df_full.reference_time) / pd.Timedelta(1, "h")
df_full["year"] = df_full.valid_time.dt.year
df_full["month"] = df_full.valid_time.dt.month
df_full["day"] = df_full.valid_time.dt.day
df_full["hour"] = df_full.valid_time.dt.hour
print(df_full.dtypes)  # Ausgabe der Datentypen des DataFrames

# Berechnung der Gesamterzeugung (MWh) durch Addition von Wind- und Solar-MWh
df_full["total_generation_MWh"] = df_full["Wind_MWh_credit"] + df_full["Solar_MWh_credit"]


reference_time            datetime64[ns, UTC]
valid_time                datetime64[ns, UTC]
CloudCover                            float32
SolarDownwardRadiation                float64
Temperature_x                         float32
RelativeHumidity                      float32
Temperature_y                         float32
WindDirection                         float32
WindDirection:100                     float32
WindSpeed                             float32
WindSpeed:100                         float32
dtm                       datetime64[ns, UTC]
Wind_MWh_credit                       float64
Solar_MWh_credit                      float64
forcast_hours                         float64
year                                    int32
month                                   int32
day                                     int32
hour                                    int32
dtype: object


In [3]:
# Spalten, die von der Normalisierung ausgeschlossen sind
exclude_columns = ['reference_time', 'Wind_MWh_credit', 'Solar_MWh_credit', 'total_generation_MWh']
# exclude_columns = ['reference_time']

# Min- und Max-Werte für jede Spalte (außer den ausgeschlossenen)
min_values = {col: df_full[col].min() for col in df_full.columns if col not in exclude_columns}
max_values = {col: df_full[col].max() for col in df_full.columns if col not in exclude_columns}
# min_values = {feature: df_full[feature].min() for feature in df_full.columns if feature != 'reference_time'}
# max_values = {feature: df_full[feature].max() for feature in df_full.columns if feature != 'reference_time'}

def min_max_normalize(df, min_values, max_values, exclude_columns):
    normalized_df = df.copy()
    for col in df.columns:
        if col not in exclude_columns:
            normalized_df[col] = (df[col] - min_values[col]) / (max_values[col] - min_values[col])
    return normalized_df

# Normalisieren und ausgeschlossene Spalten wieder hinzufügen
normalized_df = min_max_normalize(df_full, min_values, max_values, exclude_columns)
for col in exclude_columns:
    normalized_df[col] = df_full[col]

# Ausgabe und Bereinigung von fehlenden Werten
display(normalized_df)
df_cleaned = normalized_df.dropna()
print(df_cleaned.columns, len(df_cleaned.columns))
print(df_cleaned.isnull().sum())


Unnamed: 0,reference_time,valid_time,CloudCover,SolarDownwardRadiation,Temperature_x,RelativeHumidity,Temperature_y,WindDirection,WindDirection:100,WindSpeed,WindSpeed:100,dtm,Wind_MWh_credit,Solar_MWh_credit,forcast_hours,year,month,day,hour,total_generation_MWh
0,2020-09-20 00:00:00+00:00,0.000000,0.450405,0.000261,0.445126,0.716798,0.719739,0.169669,0.171738,0.370792,0.325960,0.000000,498.142,0.0,0.000000,0.0,0.727273,0.633333,0.000000,498.142
1,2020-09-20 00:00:00+00:00,0.000016,0.472211,0.000261,0.445408,0.712939,0.719550,0.168597,0.170740,0.365613,0.321656,0.000016,478.788,0.0,0.004167,0.0,0.727273,0.633333,0.000000,478.788
2,2020-09-20 00:00:00+00:00,0.000031,0.494018,0.000261,0.445690,0.709080,0.719362,0.167525,0.169742,0.360433,0.317351,0.000031,470.522,0.0,0.008333,0.0,0.727273,0.633333,0.043478,470.522
3,2020-09-20 00:00:00+00:00,0.000047,0.520214,0.000261,0.447091,0.700406,0.721303,0.166666,0.169024,0.355318,0.313408,0.000047,482.183,0.0,0.012500,0.0,0.727273,0.633333,0.043478,482.183
4,2020-09-20 00:00:00+00:00,0.000062,0.546410,0.000261,0.448493,0.691733,0.723245,0.165806,0.168306,0.350202,0.309464,0.000062,459.216,0.0,0.016667,0.0,0.727273,0.633333,0.086957,459.216
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1273198,2024-05-19 00:00:00+00:00,0.999938,0.980325,0.003716,0.444522,0.916749,0.589642,0.217023,0.243771,0.166732,0.206182,,,,0.983333,1.0,0.363636,0.733333,0.956522,
1273199,2024-05-19 00:00:00+00:00,0.999953,0.981358,0.003716,0.442858,0.924216,0.586557,0.246558,0.272641,0.166233,0.204404,,,,0.987500,1.0,0.363636,0.733333,0.956522,
1273200,2024-05-19 00:00:00+00:00,0.999969,0.982391,0.003716,0.441194,0.931683,0.583471,0.276092,0.301511,0.165734,0.202627,,,,0.991667,1.0,0.363636,0.733333,1.000000,
1273201,2024-05-19 00:00:00+00:00,0.999984,0.983425,0.003716,0.439530,0.939149,0.580386,0.305626,0.330380,0.165235,0.200850,,,,0.995833,1.0,0.363636,0.733333,1.000000,


Index(['reference_time', 'valid_time', 'CloudCover', 'SolarDownwardRadiation',
       'Temperature_x', 'RelativeHumidity', 'Temperature_y', 'WindDirection',
       'WindDirection:100', 'WindSpeed', 'WindSpeed:100', 'dtm',
       'Wind_MWh_credit', 'Solar_MWh_credit', 'forcast_hours', 'year', 'month',
       'day', 'hour', 'total_generation_MWh'],
      dtype='object') 20
reference_time            0
valid_time                0
CloudCover                0
SolarDownwardRadiation    0
Temperature_x             0
RelativeHumidity          0
Temperature_y             0
WindDirection             0
WindDirection:100         0
WindSpeed                 0
WindSpeed:100             0
dtm                       0
Wind_MWh_credit           0
Solar_MWh_credit          0
forcast_hours             0
year                      0
month                     0
day                       0
hour                      0
total_generation_MWh      0
dtype: int64


In [4]:
#  Aufteilen der Daten in Trainings- und Testdaten basierend auf dem 'reference_time'-Wert

# df_train = normalized_df.loc[normalized_df.reference_time < "2023-05-20"]
# df_test = normalized_df.loc[normalized_df.reference_time >= "2023-05-20"]
# del normalized_df
df_train = df_cleaned.loc[normalized_df.reference_time < "2023-05-20"]
df_test = df_cleaned.loc[normalized_df.reference_time >= "2023-05-20"]
del df_cleaned

In [5]:
display(df_train.describe())
display(df_test.describe())

Unnamed: 0,valid_time,CloudCover,SolarDownwardRadiation,Temperature_x,RelativeHumidity,Temperature_y,WindDirection,WindDirection:100,WindSpeed,WindSpeed:100,dtm,Wind_MWh_credit,Solar_MWh_credit,forcast_hours,year,month,day,hour,total_generation_MWh
count,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0
mean,0.360685,0.719329,0.133929,0.364071,0.665362,0.496289,0.557523,0.561972,0.278729,0.260762,0.361769,292.75896,112.359873,0.500002,0.389151,0.488475,0.491075,0.499646,405.118834
std,0.208394,0.285748,0.199506,0.135751,0.158866,0.179609,0.251181,0.251356,0.146753,0.135592,0.20902,219.976565,185.741764,0.289876,0.216268,0.32676,0.291277,0.301094,267.699132
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.180685,0.541406,0.00028,0.267769,0.558207,0.349391,0.399466,0.408221,0.160808,0.155854,0.181228,71.203,0.0,0.25,0.25,0.181818,0.233333,0.217391,164.757516
50%,0.358948,0.816045,0.019949,0.356537,0.686196,0.459469,0.589811,0.597093,0.265899,0.244044,0.360027,269.678,0.007375,0.5,0.5,0.454545,0.5,0.521739,431.903667
75%,0.536978,0.967986,0.204846,0.455678,0.789257,0.64888,0.74226,0.746186,0.383171,0.353555,0.538592,536.23,161.514772,0.75,0.5,0.818182,0.733333,0.782609,580.079
max,0.727832,1.0,0.999627,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.730019,602.1747,896.211945,1.0,0.75,1.0,1.0,1.0,1479.6468


Unnamed: 0,valid_time,CloudCover,SolarDownwardRadiation,Temperature_x,RelativeHumidity,Temperature_y,WindDirection,WindDirection:100,WindSpeed,WindSpeed:100,dtm,Wind_MWh_credit,Solar_MWh_credit,forcast_hours,year,month,day,hour,total_generation_MWh
count,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0,347178.0
mean,0.86176,0.748842,0.141446,0.396592,0.682393,0.531326,0.544382,0.549653,0.28274,0.269735,0.86435,256.856643,127.634911,0.498924,0.846438,0.50217,0.491603,0.499906,384.491554
std,0.077848,0.272129,0.208458,0.131665,0.150907,0.175555,0.24513,0.244658,0.146215,0.13565,0.078082,195.67504,206.000066,0.289832,0.121693,0.315734,0.294446,0.301012,263.468505
min,0.724292,0.0,3.3e-05,0.040508,0.090482,0.124534,0.00088,0.001914,0.00111,0.000787,0.726469,0.0,0.0,0.0,0.75,0.0,0.0,0.0,0.0
25%,0.794197,0.588701,0.000284,0.30413,0.585126,0.382851,0.370626,0.378285,0.167961,0.166254,0.796584,79.79,0.0,0.245833,0.75,0.181818,0.233333,0.26087,174.04
50%,0.862022,0.849659,0.023632,0.385928,0.70163,0.50305,0.593067,0.602437,0.271839,0.255709,0.864612,221.012,0.213016,0.5,0.75,0.545455,0.5,0.521739,367.87
75%,0.929117,0.981833,0.217931,0.49319,0.799161,0.709531,0.720513,0.724116,0.384554,0.362056,0.931909,397.46,190.88078,0.75,1.0,0.818182,0.733333,0.782609,577.2
max,0.997004,1.0,1.0,0.848379,0.999967,0.943672,0.997691,0.998607,0.840868,0.830354,1.0,593.652,983.54294,1.0,1.0,1.0,1.0,1.0,1413.397935


In [6]:
# Entfernen spezifischer Zeilen aus den Testdaten
df_test = df_test.drop(index=range(925314, 925316))
df_test = df_test.drop(index=range(925932, 925933))
df_test = df_test.drop(index=range(944237, 944238))

In [7]:
display(df_train.describe())
display(df_test.describe())

Unnamed: 0,valid_time,CloudCover,SolarDownwardRadiation,Temperature_x,RelativeHumidity,Temperature_y,WindDirection,WindDirection:100,WindSpeed,WindSpeed:100,dtm,Wind_MWh_credit,Solar_MWh_credit,forcast_hours,year,month,day,hour,total_generation_MWh
count,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0,920983.0
mean,0.360685,0.719329,0.133929,0.364071,0.665362,0.496289,0.557523,0.561972,0.278729,0.260762,0.361769,292.75896,112.359873,0.500002,0.389151,0.488475,0.491075,0.499646,405.118834
std,0.208394,0.285748,0.199506,0.135751,0.158866,0.179609,0.251181,0.251356,0.146753,0.135592,0.20902,219.976565,185.741764,0.289876,0.216268,0.32676,0.291277,0.301094,267.699132
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.180685,0.541406,0.00028,0.267769,0.558207,0.349391,0.399466,0.408221,0.160808,0.155854,0.181228,71.203,0.0,0.25,0.25,0.181818,0.233333,0.217391,164.757516
50%,0.358948,0.816045,0.019949,0.356537,0.686196,0.459469,0.589811,0.597093,0.265899,0.244044,0.360027,269.678,0.007375,0.5,0.5,0.454545,0.5,0.521739,431.903667
75%,0.536978,0.967986,0.204846,0.455678,0.789257,0.64888,0.74226,0.746186,0.383171,0.353555,0.538592,536.23,161.514772,0.75,0.5,0.818182,0.733333,0.782609,580.079
max,0.727832,1.0,0.999627,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.730019,602.1747,896.211945,1.0,0.75,1.0,1.0,1.0,1479.6468


Unnamed: 0,valid_time,CloudCover,SolarDownwardRadiation,Temperature_x,RelativeHumidity,Temperature_y,WindDirection,WindDirection:100,WindSpeed,WindSpeed:100,dtm,Wind_MWh_credit,Solar_MWh_credit,forcast_hours,year,month,day,hour,total_generation_MWh
count,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0,347174.0
mean,0.861761,0.748841,0.141445,0.39659,0.682392,0.531326,0.54438,0.549652,0.282743,0.269738,0.864351,256.859586,127.633929,0.498924,0.846439,0.502171,0.491601,0.499905,384.493514
std,0.077847,0.27213,0.208459,0.131665,0.150907,0.175556,0.245131,0.244659,0.146213,0.135648,0.078081,195.674246,206.000339,0.289832,0.121693,0.315735,0.294446,0.301013,263.468829
min,0.724292,0.0,3.3e-05,0.040508,0.090482,0.124534,0.00088,0.001914,0.00111,0.000787,0.726469,0.0,0.0,0.0,0.75,0.0,0.0,0.0,0.0
25%,0.794197,0.588696,0.000284,0.304129,0.585125,0.38285,0.370624,0.378283,0.167969,0.166257,0.796584,79.79025,0.0,0.245833,0.75,0.181818,0.233333,0.26087,174.04
50%,0.862037,0.84966,0.023632,0.385926,0.701626,0.503047,0.593066,0.602434,0.27184,0.255712,0.864628,221.012,0.213016,0.5,0.75,0.545455,0.5,0.521739,367.87
75%,0.929117,0.981834,0.217929,0.493184,0.79916,0.709532,0.720512,0.724116,0.384555,0.362056,0.931909,397.46,190.88078,0.75,1.0,0.818182,0.733333,0.782609,577.2
max,0.997004,1.0,1.0,0.848379,0.999967,0.943672,0.997691,0.998607,0.840868,0.830354,1.0,593.652,983.54294,1.0,1.0,1.0,1.0,1.0,1413.397935


In [8]:
nummer = 20000  # Anzahl der zu verwendenden Zeilen für das Training und den Test

# Vorbereiten der Trainings- und Testtabellen
modelling_table = df_train.head(nummer)  
test_table = df_test.head(nummer)

# Funktion zur Modellierung für die Trainingsdaten
def train_models(modelling_table):
    forecast_models = dict()  # Dictionary für die trainierten Modelle
    
    # Liste der Modellformeln für die Regression
    model_formulas = [
        'Wind_MWh_credit ~ bs(WindSpeed, df=8)',  # Modell 1
        'Wind_MWh_credit ~ bs(WindSpeed, df=8) + bs(CloudCover, df=8)',  # Modell 2
        'Wind_MWh_credit ~ bs(WindSpeed, df=8) + bs(CloudCover, df=8) + bs(WindDirection, df=8)'  # Modell 3
    ]
    
    # Verwende tqdm für Fortschrittsanzeigen
    for idx, formula in enumerate(model_formulas):
        for quantile in tqdm(range(10, 100, 10), desc=f"Training Modell {idx + 1}", unit="Quantil"):
            # Fitting das Modell für jeden Quantil auf den Trainingsdaten
            mod = smf.quantreg(formula, data=modelling_table)  # Quantile Regression
            forecast_models[f"Model_{idx + 1}_q{quantile}"] = mod.fit(q=quantile / 100, max_iter=2500)  # Modellanpassung
            
            # Vorhersagen für Trainingsdaten speichern
            modelling_table[f"Model_{idx + 1}_q{quantile}"] = forecast_models[f"Model_{idx + 1}_q{quantile}"].predict(modelling_table)
            modelling_table.loc[modelling_table[f"Model_{idx + 1}_q{quantile}"] < 0, f"Model_{idx + 1}_q{quantile}"] = 0  # Negative Werte auf 0 setzen
        
    return forecast_models  # Rückgabe der trainierten Modelle

# Funktion zur Berechnung des Pinball Scores
def pinball(y, q, alpha):
    # Berechnung des Pinball Scores
    return (y - q) * alpha * (y >= q) + (q - y) * (1 - alpha) * (y < q)

def pinball_score(df, model_idx):
    score = []  # Liste für den Gesamt-Pinball Score
    per_quantile_scores = []  # Liste für die Scores pro Quantil
    for qu in range(10, 100, 10):
        # Berechnung des Pinball Scores für jedes Quantil
        pinball_score_value = pinball(
            y=df["Wind_MWh_credit"].fillna(0),  # Zielvariable
            q=df[f"Model_{model_idx}_q{qu}"].fillna(0),  # Vorhersage
            alpha=qu / 100  # Quantil-Wert
        ).mean()
        
        score.append(pinball_score_value)  # Hinzufügen des Scores zur Liste
        per_quantile_scores.append((qu, pinball_score_value))  # Speichern des Scores pro Quantil
    
    # Gesamt-Pinball Score berechnen
    overall_score = np.nanmean(score)
    
    return overall_score, per_quantile_scores  # Rückgabe des Gesamtwerts und der Scores pro Quantil

# Funktion zum Testen des Modells und Ausgeben der Scores
def test_model(test_data, forecast_models):
    all_scores = {}  # Dictionary für alle Scores
    
    for model_idx in range(1, 4):  # Für die ersten 3 Modelle
        for quantile in tqdm(range(10, 100, 10), desc=f"Vorhersagen für Modell {model_idx} erstellen", unit="Quantil"):
            model_key = f"Model_{model_idx}_q{quantile}"  # Schlüssel für das Modell
            test_data[model_key] = forecast_models[model_key].predict(test_data)  # Vorhersagen für Testdaten
            test_data.loc[test_data[model_key] < 0, model_key] = 0  # Negative Werte auf 0 setzen
        
        # Gesamt-Pinball Score und Scores pro Quantil berechnen
        overall_score, per_quantile_scores = pinball_score(test_data, model_idx)
        all_scores[model_idx] = (overall_score, per_quantile_scores)  
        
        # Ausgabe der Scores für das Modell
        print(f"\nModell {model_idx} - Gesamt-Pinball Score: {overall_score}")
        for quantile, score in per_quantile_scores:
            print(f"Pinball Score für q{quantile}: {score}")

# Trainiere die Modelle auf den Trainingsdaten
forecast_models = train_models(modelling_table)

# Teste die Modelle mit den Testdaten
test_model(test_table, forecast_models)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  modelling_table[f"Model_{idx + 1}_q{quantile}"] = forecast_models[f"Model_{idx + 1}_q{quantile}"].predict(modelling_table)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  modelling_table[f"Model_{idx + 1}_q{quantile}"] = forecast_models[f"Model_{idx + 1}_q{quantile}"].predict(modelling_table)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexi


Modell 1 - Gesamt-Pinball Score: 34.93971782956159
Pinball Score für q10: 14.236594678993383
Pinball Score für q20: 22.977397485035592
Pinball Score für q30: 28.945472957719712
Pinball Score für q40: 34.26360358086618
Pinball Score für q50: 39.59320962855502
Pinball Score für q60: 44.98320429367406
Pinball Score für q70: 49.300021595360825
Pinball Score für q80: 47.55491127693617
Pinball Score für q90: 32.60304496891333


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data[model_key] = forecast_models[model_key].predict(test_data)  # Vorhersagen für Testdaten
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data[model_key] = forecast_models[model_key].predict(test_data)  # Vorhersagen für Testdaten
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data


Modell 2 - Gesamt-Pinball Score: 35.517976470179995
Pinball Score für q10: 14.371265625846794
Pinball Score für q20: 23.30409561988241
Pinball Score für q30: 30.023587304255006
Pinball Score für q40: 35.420490338981516
Pinball Score für q50: 40.37324310548769
Pinball Score für q60: 45.8033637881824
Pinball Score für q70: 49.667111155536084
Pinball Score für q80: 48.06059602053275
Pinball Score für q90: 32.63803527291524


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data[model_key] = forecast_models[model_key].predict(test_data)  # Vorhersagen für Testdaten
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data[model_key] = forecast_models[model_key].predict(test_data)  # Vorhersagen für Testdaten
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_data


Modell 3 - Gesamt-Pinball Score: 49.50128553454392
Pinball Score für q10: 14.09636229273511
Pinball Score für q20: 37.75322040443887
Pinball Score für q30: 51.483149768552565
Pinball Score für q40: 58.08131532070376
Pinball Score für q50: 61.76641145606889
Pinball Score für q60: 64.66845260412411
Pinball Score für q70: 65.71452009348583
Pinball Score für q80: 56.78856654663971
Pinball Score für q90: 35.15957132414646



