In [None]:
# This script is optimised specificly for the "island" use case

In [None]:
import xarray as xr
import netCDF4
import pandas as pd
import xarray as xr
import numpy as np
import datetime

In [None]:
# Load in client forecast and ocf forecast
data_cli = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/clients/island/island_client_fc_UTC.nc"
# data_ocf = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/clients/island/ocf_model.csv"
data_ocf = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/clients/island/ocf_model_excT_gfsT_8t/test_errors.csv"

In [None]:
ds_cli = xr.open_dataset(data_cli)
# just interested in 'power' (client fc)
ds_cli = ds_cli["power"]
df_cli = ds_cli.to_dataframe().reset_index()
df_cli.drop(["pv_id", "latitude", "longitude"], axis=1, inplace=True)
df_cli = df_cli.rename(columns={"power": "client"})
df_cli = df_cli.rename(columns={"ts": "ts_end"})

df_ocf = pd.read_csv(data_ocf)
print(df_ocf)
print(df_cli)

In [None]:
# Clients forecast is made the previous day, so we want to take account of this.
def gen_time_step_cli(df):

    df["ts_start"] = df["ts_end"] - pd.to_timedelta(1, unit="h")

    # Subtract 1 day from each ts value
    ts_minus_1_day = df["ts_start"] - pd.DateOffset(days=1)

    # Set the time component of each ts value to 10:00:00
    ts = ts_minus_1_day.dt.floor("D") + pd.Timedelta(hours=10)

    # Calculate the time difference between ts and init_time in minutes
    horizon = ((df["ts_start"] - ts) / pd.Timedelta(minutes=1)).astype(int)

    df["ts"] = ts
    df["horizon"] = horizon

    print(df)
    return df

In [None]:
df_cli = gen_time_step_cli(df_cli)

In [None]:
# Filter OCFs forecast to only include specific horizons and times.
def filt_on_time(df):
    df["ts"] = pd.to_datetime(df["ts"])
    df["time"] = df["ts"].dt.time

    desired_time = pd.to_datetime("10:00:00").time()
    selected_rows = df[(df["time"] == desired_time) & (df["horizon"].between(840, 2279))]

    #     desired_time = pd.to_datetime('05:00:00').time()
    #     selected_rows = df[(df['time'] == desired_time) & (df['horizon'].between(0, 1439))]

    new_df = selected_rows[["ts_start", "ts", "y", "pred", "horizon"]]
    new_df = new_df.rename(columns={"y": "truth"})
    new_df = new_df.rename(columns={"pred": "ocf"})

    return new_df

In [None]:
df_ocf_10 = filt_on_time(df_ocf)
df_ocf_10

In [None]:
df_cli["ts_start"] = pd.to_datetime(df_cli["ts_start"])
df_ocf_10["ts_start"] = pd.to_datetime(df_ocf_10["ts_start"])
merged_df = pd.merge(df_cli, df_ocf_10, on=["ts_start", "ts", "horizon"])
merged_df.head(20)

In [None]:
def mae_cal(df):
    # Calculate the MAE between "power" and "truth"
    mae_client = np.mean(np.abs(df["client"] - df["truth"]))

    # Calculate the MAE between "ocf" and "truth"
    mae_ocf = np.mean(np.abs(df["ocf"] - df["truth"]))

    print("MAE between client and truth:", mae_client)
    print("MAE between ocf and truth:", mae_ocf)

In [None]:
mae_cal(merged_df)

In [None]:
# Compare the performance on specific days
# Viewing the error on specific days
dates = [
    "01/10/2022",
    "08/10/2022",
    "10/10/2022",
    "12/10/2022",
    "14/10/2022",
    "20/10/2022",
    "21/10/2022",
    "05/11/2022",
    "10/11/2022",
    "11/11/2022",
    "12/11/2022",
    "14/11/2022",
    "15/11/2022",
    "17/11/2022",
    "21/11/2022",
    "23/11/2022",
    "25/11/2022",
    "26/11/2022",
    "28/11/2022",
]
# Convert the list of dates to a pandas datetime
date_index = pd.to_datetime(dates, format="%d/%m/%Y")

In [None]:
merged_df

In [None]:
def mae_on_date(df, date):
    date = pd.to_datetime(date)
    df_s = df[df["ts"].dt.date == date.date()]

    mae_client = np.mean(np.abs(df_s["client"] - df_s["truth"]))
    mae_ocf = np.mean(np.abs(df_s["ocf"] - df_s["truth"]))

    return mae_client, mae_ocf

In [None]:
mae_list = []  # we start with an empty list

for i in date_index:
    # Calculating mae for each date
    mae_client, mae_ocf = mae_on_date(merged_df, i)

    # Appending the results to the list
    mae_list.append({"date": i, "mae_client": mae_client, "mae_ocf": mae_ocf})

# convert the list of dicts to a DataFrame
mae_on_dates = pd.DataFrame(mae_list)

In [None]:
mae_on_dates

In [None]:
avg_mae_client = mae_on_dates["mae_client"].mean()
avg_mae_ocf = mae_on_dates["mae_ocf"].mean()
print("avg mae client", avg_mae_client)
print("avg mae ocf", avg_mae_ocf)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt


def plot_on_date(df, specific_ts):
    specific_data = df[df["ts"] == specific_ts]

    plt.figure(figsize=(10, 6))
    plt.plot(specific_data["horizon"], specific_data["client"], label="Client")
    plt.plot(specific_data["horizon"], specific_data["truth"], label="Truth")
    plt.plot(specific_data["horizon"], specific_data["ocf"], label="OCF")
    plt.xlabel("Horizon (minutes)")
    plt.ylabel("Power (MW) ")
    plt.title(f"Data for ts: {specific_ts}")
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()

    # Export the plot
    #     filename = " " + ts.replace(" ", "_").replace(":", "-") + ".png"
    #     plt.savefig(filename)

    plt.show()

In [None]:
specific_ts = "2022-10-11 10:00:00"

# dates = [
#     "01/10/2022",
#     "08/10/2022",
#     "10/10/2022",
#     "12/10/2022",
#     "14/10/2022",
#     "20/10/2022",
#     "21/10/2022",
#     "05/11/2022",
#     "10/11/2022",
#     "11/11/2022",
#     "12/11/2022",
#     "14/11/2022",
#     "15/11/2022",
#     "17/11/2022",
#     "21/11/2022",
#     "23/11/2022",
#     "25/11/2022",
#     "26/11/2022",
#     "28/11/2022",
# ]

plot_on_date(merged_df, specific_ts)

In [None]:
import datetime

# Convert the string dates to datetime format
def convert_to_datetime(date_str):
    return datetime.datetime.strptime(date_str, "%d/%m/%Y")


# dates = [
#     "01/10/2022",
#     "08/10/2022",
#     "10/10/2022",
#     "12/10/2022",
#     "14/10/2022",
#     "20/10/2022",
#     "21/10/2022",
#     "05/11/2022",
#     "10/11/2022",
#     "11/11/2022",
#     "12/11/2022",
#     "14/11/2022",
#     "15/11/2022",
#     "17/11/2022",
#     "21/11/2022",
#     "23/11/2022",
#     "25/11/2022",
#     "26/11/2022",
#     "28/11/2022",
# ]

# Loop through each date, convert it, and apply the function
for date_str in dates:
    ts = convert_to_datetime(date_str).strftime("%Y-%m-%d 10:00:00")
    plot_on_date(merged_df, ts)