In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from datetime import datetime, timedelta
pd.options.display.max_rows = 100

In [None]:
pv_models = [
    "model_1",
    "model_2"
]

model_mapping = {
    "model_1":"model_1_rename",
    "model_2":"model_2_rename",
}

pvlive_df = pd.read_csv("../pvlive_2016_2023.csv")

In [None]:
def calculate_mae_for_forecast_horizons(pv_models, df_actual, min_end_datetime_utc=None):
    mae_results = []  # List to store MAE results for each model

    # Load data
    for pv_model in pv_models:
        print(pv_model)
        try:
            df_forecast = pd.read_csv("../data/" + pv_model + ".csv.gz")
        except FileNotFoundError:
            df_forecast = pd.read_csv("../data/" + pv_model + ".csv")
        merged_df = pd.merge(df_forecast, df_actual, on='end_datetime_utc', suffixes=('_forecast', '_actual'))

        # Filter the merged dataframe by the minimum end_datetime_utc if provided
        if min_end_datetime_utc is not None:
            merged_df['end_datetime_utc'] = pd.to_datetime(merged_df['end_datetime_utc'])
            merged_df = merged_df[merged_df['end_datetime_utc'] >= min_end_datetime_utc]

            # Here is the option to also have a max end datetime as well
            # max_end_datetime_utc = pd.to_datetime("2020-01-01 00:00:00").tz_localize('UTC')
            # merged_df = merged_df[merged_df['end_datetime_utc'] <= max_end_datetime_utc]

        # Calculate the absolute error between the forecasted and actual generation
        merged_df['absolute_error'] = np.abs(merged_df['generation_mw_forecast'] - merged_df['generation_mw_actual'])
        # Calculate the forecast horizon in hours
        merged_df['forecast_horizon_hours'] = (pd.to_datetime(merged_df['end_datetime_utc']) - pd.to_datetime(merged_df['forecasting_creation_datetime_utc'])).dt.total_seconds() / 3600
        mae_by_horizon = merged_df.groupby('forecast_horizon_hours')['absolute_error'].mean().reset_index(name='MAE')
        mae_by_horizon['Model'] = pv_model
        mae_results.append(mae_by_horizon)

    # Concatenate all MAE results into a single DataFrame
    merged_forecasts = pd.concat(mae_results)
    merged_forecasts['Model'] = merged_forecasts['Model'].replace(model_mapping)
    
    return merged_forecasts

In [None]:
mae_results = calculate_mae_for_forecast_horizons(pv_models, pvlive_df)
display(mae_results)

In [None]:
def calculate_average_mae(df):
    # Get the unique models from the DataFrame
    models = df['Model'].unique()
    
    # Iterate through each model and calculate the average MAE for the specified time frames
    for model in models:
        model_df = df[df['Model'] == model]
        
        # Calculate average MAE for 0-8 hours
        avg_mae_0_8 = model_df[model_df['forecast_horizon_hours'] <= 8]['MAE'].mean()
        
        # Calculate average MAE for 0-40 hours
        avg_mae_0_40 = model_df[model_df['forecast_horizon_hours'] <= 40]['MAE'].mean()
        
        # Print the results
        print(f"Model: {model}")
        print(f"Average MAE for 0-8 hours: {avg_mae_0_8:.2f}")
        print(f"Average MAE for 0-40 hours: {avg_mae_0_40:.2f}")

In [None]:
calculate_average_mae(mae_results)

In [None]:
# Use the min end datetime to calculate the error for just pvnet summation model times
min_date = pd.to_datetime("2019-01-01 00:00:00").tz_localize('UTC')
mae_results_pvnet = calculate_mae_for_forecast_horizons(pv_models, pvlive_df, min_end_datetime_utc=min_date)
calculate_average_mae(mae_results_pvnet)

In [None]:
def plot_multiple_mae_forecast_horizons(df):
    fig = go.Figure()
    colors = ['#7BCDF3', '#63BCAF', '#FF9736', '#FFD053' , '#14120E', '#4C9A8E']

    # Get unique models names and sort alphabetically
    models = df['Model'].unique()
    models = sorted(models)

    for i, model in enumerate(models):
        # Filter the DataFrame for the current model
        model_df = df[df['Model'] == model]

        # Add line plot for MAE across different forecast horizons with color
        fig.add_trace(go.Scatter(x=model_df['forecast_horizon_hours'], y=model_df['MAE'],
                                 mode='lines+markers', name=model, line=dict(color=colors[i % len(colors)])))

    fig.update_layout(title='MAE for Multiple OCF Models',
                      xaxis_title='Forecast Horizon',
                      yaxis_title='Eror (MAE) MW',
                      template='plotly_white')
    fig.show()

In [None]:
plot_multiple_mae_forecast_horizons(mae_results)

In [None]:
plot_multiple_mae_forecast_horizons(mae_results_pvnet)

In [None]:
# Function to plot multiple forecasts on the same graph including pvlive_df for 2 days ahead
def plot_multiple_forecasts_with_pvlive(forecasting_datetime, df_list, df_names, pvlive_df):
    fig = go.Figure()
    
    forecasting_datetime_obj = datetime.strptime(forecasting_datetime, "%Y-%m-%d %H:%M:%S%z")
    # Calculate 2 days ahead datetime
    two_days_ahead_datetime = forecasting_datetime_obj + timedelta(days=1)
    pvlive_filtered = pvlive_df[(pvlive_df['start_datetime_utc'] >= forecasting_datetime) & (pvlive_df['end_datetime_utc'] <= two_days_ahead_datetime.isoformat())]
    
    fig.add_trace(go.Scatter(x=pvlive_filtered['end_datetime_utc'], y=pvlive_filtered['generation_mw'], mode='lines', name='pvlive_data'))
    
    for df, name in zip(df_list, df_names):
        filtered_df = df[df['forecasting_creation_datetime_utc'] == forecasting_datetime]
        generation_data = filtered_df[['end_datetime_utc', 'generation_mw']]
        # Use model_mapping to rename the models
        mapped_name = model_mapping.get(name, name)
        fig.add_trace(go.Scatter(x=generation_data['end_datetime_utc'], y=generation_data['generation_mw'], mode='lines', name=mapped_name))
    
    fig.update_layout(title=f'Generation Data for {forecasting_datetime} including pvlive data for 2 days ahead', xaxis_title='End Datetime UTC', yaxis_title='Generation MW')
    fig.show()

In [None]:
forecasting_datetime = "2021-07-05 02:30:00+00:00"
models_df = []

for pv_model in pv_models:
    try:
        df_forecast = pd.read_csv("../data/" + pv_model + ".csv.gz")
    except FileNotFoundError:
        df_forecast = pd.read_csv("../data/" + pv_model + ".csv")

    models_df.append(df_forecast)

plot_multiple_forecasts_with_pvlive(forecasting_datetime, models_df, model_mapping, pvlive_df)