# Hello and welcome

In [1]:
# import lib and data
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error

# train
y_all = pd.read_csv("data/target_train.csv")
y_all["dtm"] = pd.to_datetime(y_all["dtm"])

# comp
energy_data_2 = pd.read_csv("data_comp/Energy_Data_20240119_20240519.csv")
energy_data_2["dtm"] = pd.to_datetime(energy_data_2["dtm"])

energy_data_2["Wind_MWh_credit"] = 0.5 * energy_data_2["Wind_MW"] - energy_data_2["boa_MWh"]
energy_data_2["Solar_MWh_credit"] = 0.5 * energy_data_2["Solar_MW"]
energy_data_2["total_generation_MWh"] = energy_data_2["Wind_MWh_credit"] + energy_data_2["Solar_MWh_credit"]


# Train and Comp for Random Forest

In [4]:
# Train data
# RandomForest split dates
# 65% and 35 % split
#split_date = pd.Timestamp("2022-10-01 00:00:00+00:00")

# 80 %  and 20 % split 
split_date = pd.Timestamp("2023-03-15 00:00:00+00:00")

# naive predictions are for yesterday values, shift 1 day 
y_all["nav_pred"] = y_all["total_generation_MWh"].shift(48)

# filter data with spliting date
y_filt = y_all[(y_all["dtm"] >= split_date)]

# cal MAE
mae_train = mean_absolute_error(y_filt["total_generation_MWh"], y_filt["nav_pred"])
print(f"Train Naive MAE: {mae_train} MWh")

# Comp data 
# competition dates
start_date_comp = pd.Timestamp("2024-02-20 00:00:00+00:00")
end_date_comp = pd.Timestamp("2024-05-19 23:30:00+00:00")

# nav pred, shift 1 day 
energy_data_2["nav_pred"] = energy_data_2["total_generation_MWh"].shift(48)
energy_filt = energy_data_2[(energy_data_2["dtm"] >= start_date_comp) & (energy_data_2["dtm"] <= end_date_comp)]

# cal MAE
mae_comp = mean_absolute_error(energy_filt["total_generation_MWh"], energy_filt["nav_pred"])
print(f"Comp Naive MAE: {mae_comp} MWh")


Train Naive MAE: 214.22408079035466 MWh
Comp Naive MAE: 164.0164399063994 MWh


# Quantiles 

In [None]:
# we have our lovely competition functions 
def pinball(y, q, alpha):
    return (y - q) * alpha * (y >= q) + (q - y) * (1 - alpha) * (y < q)

def pinball_score(df):
    scores = [
        pinball(
            y=df["total_generation_MWh"],
            q=df[f"q{qu}"],
            alpha=qu/100
        ).mean()
        for qu in range(10, 100, 10)
    ]
    return np.mean(scores)

def create_naive_df(y_true, y_naive):
    naive_preds = {f"q{int(q*100)}": y_naive for q in quantiles}
    naive_preds["total_generation_MWh"] = y_true
    return pd.DataFrame(naive_preds)

# Xavire ~ "Assume same value for all quantiles"
quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]

# train
df_naive_train_total = create_naive_df(y_filt["total_generation_MWh"].values, y_filt["nav_pred"].values)
df_naive_train_solar = create_naive_df(y_filt["Solar_MWh_credit"].values, y_filt["nav_pred"].values)
df_naive_train_wind  = create_naive_df(y_filt["Wind_MWh_credit"].values, y_filt["nav_pred"].values)

score_naive_train_total = pinball_score(df_naive_train_total)
score_naive_train_solar = pinball_score(df_naive_train_solar)
score_naive_train_wind  = pinball_score(df_naive_train_wind)

print(f"Naive Pinball Score (Training - Total): {score_naive_train_total:.4f}")
print(f"Naive Pinball Score (Training - Solar): {score_naive_train_solar:.4f}")
print(f"Naive Pinball Score (Training - Wind):  {score_naive_train_wind:.4f}")

# comp
df_naive_comp_total = create_naive_df(energy_filt["total_generation_MWh"].values, energy_filt["nav_pred"].values)
df_naive_comp_solar = create_naive_df(energy_filt["Solar_MWh_credit"].values, energy_filt["nav_pred"].values)
df_naive_comp_wind  = create_naive_df(energy_filt["Wind_MWh_credit"].values, energy_filt["nav_pred"].values)

score_naive_comp_total = pinball_score(df_naive_comp_total)
score_naive_comp_solar = pinball_score(df_naive_comp_solar)
score_naive_comp_wind  = pinball_score(df_naive_comp_wind)

print(f"Naive Pinball Score (Competition - Total): {score_naive_comp_total:.4f}")
print(f"Naive Pinball Score (Competition - Solar): {score_naive_comp_solar:.4f}")
print(f"Naive Pinball Score (Competition - Wind):  {score_naive_comp_wind:.4f}")



Naive Pinball Score (Training - Total): 107.1120
Naive Pinball Score (Training - Solar): 130.6055
Naive Pinball Score (Training - Wind):  146.6069
Naive Pinball Score (Competition - Total): 82.0082
Naive Pinball Score (Competition - Solar): 123.3838
Naive Pinball Score (Competition - Wind):  118.6926
