In [None]:
#This script is optimised specificly for the "island" use case

In [None]:
#Import libs
import xarray as xr
import netCDF4
import pandas as pd
import xarray as xr
import numpy as np
import altair as alt

In [None]:
#Load in client forecast and ocf forecast
data_cli = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/clients/island/island_client_fc_UTC.nc"
data_ocf = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/clients/island/ocf_model.csv"

In [None]:
ds_cli = xr.open_dataset(data_cli)
#just interested in 'power' (client fc)
ds_cli = ds_cli['power']
df_cli = ds_cli.to_dataframe().reset_index()
df_cli.drop(['pv_id', 'latitude', 'longitude'], axis=1, inplace=True)
df_cli = df_cli.rename(columns={'power': 'client'})
df_cli = df_cli.rename(columns={'ts': 'ts_end'})

df_ocf = pd.read_csv(data_ocf)

In [None]:
df_ocf

In [None]:
df_cli

In [None]:
#Clients forecast is made the previous day, so we want to take account of this.

def gen_time_step_cli(df):
    
    df['ts_start'] = df['ts_end'] - pd.to_timedelta(1, unit='h')
    
    # Subtract 1 day from each ts value
    ts_minus_1_day = df['ts_start'] - pd.DateOffset(days=1)

    # Set the time component of each ts value to 10:00:00
    ts = ts_minus_1_day.dt.floor('D') + pd.Timedelta(hours=10)

    # Calculate the time difference between ts and init_time in minutes
    horizon = ((df['ts_start']-ts) / pd.Timedelta(minutes=1)).astype(int)

    df['ts'] = ts
    df['horizon'] = horizon

    print(df)
    return df


In [None]:
df_cli = gen_time_step_cli(df_cli)

In [None]:
#Filter OCFs forecast to only include specific horizons and times.
def filt_on_time(df):
    df['ts'] = pd.to_datetime(df['ts'])
    df['time'] = df['ts'].dt.time
    
    desired_time = pd.to_datetime('10:00:00').time()
    selected_rows = df[(df['time'] == desired_time) & (df['horizon'].between(840, 2279))]
    
#     desired_time = pd.to_datetime('05:00:00').time()
#     selected_rows = df[(df['time'] == desired_time) & (df['horizon'].between(0, 1439))]
    
    new_df = selected_rows[['ts_start','ts', 'y', 'pred','horizon']]
    new_df = new_df.rename(columns={'y': 'truth'})
    new_df = new_df.rename(columns={'pred': 'ocf'})
    
    return new_df

In [None]:
df_ocf_10 = filt_on_time(df_ocf)

In [None]:
df_ocf_10

In [None]:
df_cli['ts_start'] = pd.to_datetime(df_cli['ts_start'])
df_ocf_10['ts_start'] = pd.to_datetime(df_ocf_10['ts_start'])
merged_df = pd.merge(df_cli, df_ocf_10, on=['ts_start','ts','horizon'])
merged_df.head(20)

In [None]:
def mae_cal(df):
    # Calculate the MAE between "power" and "truth"
    mae_client = np.mean(np.abs(df['client'] - df['truth']))

    # Calculate the MAE between "ocf" and "truth"
    mae_ocf = np.mean(np.abs(df['ocf'] - df['truth']))

    print("MAE between power and truth:", mae_client)
    print("MAE between ocf and truth:", mae_ocf)

In [None]:
mae_cal(merged_df)

In [None]:
# Next i am looking to view the specifc days that the client has asked

In [None]:
#Viewing the error on specific days
dates = [
    '01/10/2022',
    '08/10/2022',
    '10/10/2022',
    '12/10/2022',
    '14/10/2022',
    '20/10/2022',
    '21/10/2022',
    '05/11/2022',
    '10/11/2022',
    '11/11/2022',
    '12/11/2022',
    '14/11/2022',
    '15/11/2022',
    '17/11/2022',
    '21/11/2022',
    '23/11/2022',
    '25/11/2022',
    '26/11/2022',
    '28/11/2022'
]
# Convert the list of dates to a pandas datetime
date_index = pd.to_datetime(dates, format='%d/%m/%Y')

In [None]:
date_index

In [None]:
merged_df

In [None]:
def mae_on_date(df,date):
    date = pd.to_datetime(date)
    df_s = df[df['ts_start'].dt.date == date.date()]
    
    mae_client = np.mean(np.abs(df_s['client'] - df_s['truth']))
    mae_ocf = np.mean(np.abs(df_s['ocf'] - df_s['truth']))
    
    return mae_client, mae_ocf

In [None]:
mae_list = []  # we start with an empty list

for i in date_index:
    # Calculating mae for each date
    mae_client, mae_ocf = mae_on_date(merged_df, i)
    
    # Appending the results to the list
    mae_list.append({'date': i, 'mae_client': mae_client, 'mae_ocf': mae_ocf})

# convert the list of dicts to a DataFrame
mae_on_dates = pd.DataFrame(mae_list)


In [None]:
mae_on_dates

In [None]:
avg_mae_client = mae_on_dates['mae_client'].mean()
avg_mae_ocf = mae_on_dates['mae_ocf'].mean()
print("avg mae client", avg_mae_client)
print("avg mae ocf", avg_mae_ocf)

In [None]:
df = merged_df

In [None]:
# More analysis on the MAE for month and horizon
# def mae_month_horizon(df):
df['client_error'] = df['client'] - df['truth']
df['ocf_error'] = df['ocf'] - df['truth']
df['month'] = df['ts_start'].dt.month
df['day'] = df['ts_start'].dt.day

df['abs_client_error'] = abs(df['client'] - df['truth'])
df['abs_ocf_error'] = abs(df['ocf'] - df['truth'])


avg_error_per_month = df.groupby(['month']).mean()[['client_error', 'ocf_error']].reset_index()

avg_error_per_horizon = df.groupby(['horizon']).mean()[['client_error', 'ocf_error']].reset_index()

avg_error_per_day = df.groupby(['day']).mean()[['client_error', 'ocf_error']].reset_index()


abs_avg_error_per_month = df.groupby(['month']).mean()[['abs_client_error', 'abs_ocf_error']].reset_index()

abs_avg_error_per_horizon = df.groupby(['horizon']).mean()[['abs_client_error', 'abs_ocf_error']].reset_index()

abs_avg_error_per_day = df.groupby(['day']).mean()[['abs_client_error', 'abs_ocf_error']].reset_index()


df
#     chart_avg_error_per_month = alt.Chart(avg_error_per_month).mark_bar().encode(
#         x='month:N',
#         y='value:Q',
#         color='model:N',
#         column='error_type:N'
#     ).properties(
#         title='Average Error per Month for Each Model',
#         width=600,
#         height=300
#     )

#     chart_avg_error_per_month.display()

#     chart_avg_error_per_horizon = alt.Chart(avg_error_per_horizon).mark_line().encode(
#         x='horizon:Q',
#         y='value:Q',
#         color='model:N',
#         strokeDash='error_type:N'
#     ).properties(
#         title='Average Error per Horizon for Each Model',
#         width=600,
#         height=300
#     )

#     chart_avg_error_per_horizon.display()


In [None]:
mae_month_horizon(merged_df)

In [None]:
line = (
    alt.Chart(avg_error_per_month.melt('month', var_name='error_type', value_name='error'))
    .mark_line(interpolate="step-after", point=True)
    .encode(
        y=alt.Y("error", title="Error", scale=alt.Scale(zero=False)),
        color=alt.Color("error_type:N", scale=alt.Scale(domain=['client_error', 'ocf_error'], range=['red', 'blue'])),
        x=alt.X("month:O", title="Month")
    )
)

c = (
    alt.layer(line).properties(
        height=250,
        width=700,
        title="MAE per month"
    )
)

c


In [None]:
line = (
    alt.Chart(avg_error_per_horizon.melt('horizon', var_name='error_type', value_name='error'))
    .mark_line(interpolate="step-after", point=True)
    .encode(
        y=alt.Y("error", title="Error", scale=alt.Scale(zero=False)),
        color=alt.Color("error_type:N", scale=alt.Scale(domain=['client_error', 'ocf_error'], range=['red', 'blue'])),
        x=alt.X("horizon:O", title="Horizon")
    )
)

c = (
    alt.layer(line).properties(
        height=250,
        width=700,
        title="MAE per horizon"
    )
)

c

In [None]:
line = (
    alt.Chart(avg_error_per_day.melt('day', var_name='error_type', value_name='error'))
    .mark_line(interpolate="step-after", point=True)
    .encode(
        y=alt.Y("error", title="Error", scale=alt.Scale(zero=False)),
        color=alt.Color("error_type:N", scale=alt.Scale(domain=['client_error', 'ocf_error'], range=['red', 'blue'])),
        x=alt.X("day:O", title="Day")
    )
)

c = (
    alt.layer(line).properties(
        height=250,
        width=700,
        title="MAE per day"
    )
)

c

In [None]:
# abs_avg_error_per_month
line = (
    alt.Chart(abs_avg_error_per_month.melt('month', var_name='error_type', value_name='error'))
    .mark_line(interpolate="step-after", point=True)
    .encode(
        y=alt.Y("error", title="Error", scale=alt.Scale(zero=False)),
        color=alt.Color("error_type:N", scale=alt.Scale(domain=['abs_client_error', 'abs_ocf_error'], range=['red', 'blue'])),
        x=alt.X("month:O", title="Month")
    )
)

c = (
    alt.layer(line).properties(
        height=250,
        width=700,
        title="abs MAE per month"
    )
)

c

In [None]:
line = (
    alt.Chart(abs_avg_error_per_horizon.melt('horizon', var_name='error_type', value_name='error'))
    .mark_line(interpolate="step-after", point=True)
    .encode(
        y=alt.Y("error", title="Error", scale=alt.Scale(zero=False)),
        color=alt.Color("error_type:N", scale=alt.Scale(domain=['abs_client_error', 'abs_ocf_error'], range=['red', 'blue'])),
        x=alt.X("horizon:O", title="Horizon")
    )
)

c = (
    alt.layer(line).properties(
        height=250,
        width=700,
        title="abs MAE per horizon"
    )
)

c

In [None]:
line = (
    alt.Chart(abs_avg_error_per_day.melt('day', var_name='error_type', value_name='error'))
    .mark_line(interpolate="step-after", point=True)
    .encode(
        y=alt.Y("error", title="Error", scale=alt.Scale(zero=False)),
        color=alt.Color("error_type:N", scale=alt.Scale(domain=['abs_client_error', 'abs_ocf_error'], range=['red', 'blue'])),
        x=alt.X("day:O", title="Day")
    )
)

c = (
    alt.layer(line).properties(
        height=250,
        width=700,
        title="MAE per day"
    )
)

c