In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

In [None]:
df_30_xgb_unorm_form = pd.read_csv("../data/full_predictions_cross_validation_v4_without_prob_with_30min_unormalised_formated.csv.gz")
df_pvnet_form = pd.read_csv("../data/pvnet_predicitons_2021-2023_formated_v2.csv.gz")
pvlive_df = pd.read_csv("../pvlive_2016_2022.csv")
model_v6_df = pd.read_csv("../data/forecast_v=6__model_name=national_xg__model_version=1.0.23__start_date=2016-12-01__end_date=2022-08-08.csv")

In [None]:
# Function to calculate Mean Absolute Error (MAE) for each forecast horizon
def calculate_mae_for_forecast_horizons(df_forecast, df_actual):
    # Merge the forecast and actual dataframes on the end_datetime_utc column
    merged_df = pd.merge(df_forecast, df_actual, on='end_datetime_utc', suffixes=('_forecast', '_actual'))
    
    # Calculate the difference in hours between the forecasting_creation_datetime_utc and end_datetime_utc
    merged_df['forecast_horizon_hours'] = (pd.to_datetime(merged_df['end_datetime_utc']) - pd.to_datetime(merged_df['forecasting_creation_datetime_utc'])).dt.total_seconds() / 3600
    
    # Calculate the absolute error between the forecasted and actual generation
    merged_df['absolute_error'] = np.abs(merged_df['generation_mw_forecast'] - merged_df['generation_mw_actual'])
    
    # Group by the forecast horizon and calculate the mean absolute error for each group
    mae_by_horizon = merged_df.groupby('forecast_horizon_hours')['absolute_error'].mean().reset_index(name='MAE')
    
    return mae_by_horizon

# Calculate MAE for df_30_xgb_unorm_form against pvlive_df
mae_df_30_xgb_unorm_form = calculate_mae_for_forecast_horizons(df_30_xgb_unorm_form, pvlive_df)

# Display the MAE for each forecast horizon
print(mae_df_30_xgb_unorm_form)

In [None]:
# Create a plotly figure
fig = go.Figure()

# Add line plot for MAE across different forecast horizons
fig.add_trace(go.Scatter(x=mae_df_30_xgb_unorm_form['forecast_horizon_hours'], y=mae_df_30_xgb_unorm_form['MAE'],
                         mode='lines+markers', name='MAE', line=dict(color='blue')))

# Highlight half-hourly points
half_hourly_points = mae_df_30_xgb_unorm_form[mae_df_30_xgb_unorm_form['forecast_horizon_hours'] % 1 != 0]
fig.add_trace(go.Scatter(x=half_hourly_points['forecast_horizon_hours'], y=half_hourly_points['MAE'],
                         mode='markers', name='Half-hourly Points', marker=dict(color='red', size=10)))

# Update layout with titles and axis labels
fig.update_layout(title='MAE across Different Forecast Horizons',
                  xaxis_title='Forecast Horizon (hours)',
                  yaxis_title='Mean Absolute Error (MAE)',
                  template='plotly_white')

# Show plot
fig.show()


In [None]:
# Calculate the difference in MAE between consecutive forecast horizons
mae_df_30_xgb_unorm_form['MAE_diff'] = mae_df_30_xgb_unorm_form['MAE'].diff().fillna(0)

# Plotting the difference in MAE
plt.figure(figsize=(10, 6))
plt.plot(mae_df_30_xgb_unorm_form['forecast_horizon_hours'], abs(mae_df_30_xgb_unorm_form['MAE_diff']), marker='x', linestyle='--', color='g')
plt.title('Absolute Difference in MAE across Forecast Horizons')
plt.xlabel('Forecast Horizon (hours)')
plt.ylabel('Difference in MAE')
plt.grid(True)
plt.show()


mae_df_30_xgb_unorm_form.head(20)

In [None]:
pvlive_df

In [None]:
# Define the function to retrieve generation data for a given forecasting creation datetime from a dataframe
def get_generation_for_forecasting_datetime(forecasting_datetime, df_name):
    # Filter the dataframe by the given forecasting creation datetime
    filtered_df = df_name[df_name['forecasting_creation_datetime_utc'] == forecasting_datetime]
    
    # Select only the relevant columns
    result_df = filtered_df[['end_datetime_utc', 'generation_mw']]
    
    return result_df

# Function to plot multiple forecasts on the same graph including pvlive_df for 2 days ahead
def plot_multiple_forecasts_with_pvlive(forecasting_datetime, df_list, df_names, pvlive_df):
    fig = go.Figure()
    
    # Convert forecasting_datetime to datetime object
    forecasting_datetime_obj = datetime.strptime(forecasting_datetime, "%Y-%m-%d %H:%M:%S%z")
    # Calculate 2 days ahead datetime
    two_days_ahead_datetime = forecasting_datetime_obj + timedelta(days=1)
    
    # Filter pvlive_df for the range
    pvlive_filtered = pvlive_df[(pvlive_df['start_datetime_utc'] >= forecasting_datetime) & (pvlive_df['end_datetime_utc'] <= two_days_ahead_datetime.isoformat())]
    
    # Add pvlive data to the plot
    fig.add_trace(go.Scatter(x=pvlive_filtered['end_datetime_utc'], y=pvlive_filtered['generation_mw'], mode='lines', name='pvlive_data'))
    
    for df, name in zip(df_list, df_names):
        generation_data = get_generation_for_forecasting_datetime(forecasting_datetime, df)
        fig.add_trace(go.Scatter(x=generation_data['end_datetime_utc'], y=generation_data['generation_mw'], mode='lines', name=name))
    
    fig.update_layout(title=f'Generation Data for {forecasting_datetime} including pvlive data for 2 days ahead', xaxis_title='End Datetime UTC', yaxis_title='Generation MW')
    fig.show()

# Example usage
forecasting_datetime = "2022-07-05 06:30:00+00:00"
df_list = [df_30_xgb_unorm_form, df_pvnet_form]
df_names = ['df_30_xgb_unorm_form', 'df_pvnet_form']

plot_multiple_forecasts_with_pvlive(forecasting_datetime, df_list, df_names, pvlive_df)


In [None]:
# Create plot to simulat previous results from XGboost MAE vs Horizon

# Create a DataFrame from the provided data
data = {
    "Horizon": [0, 1, 2, 4, 8, 16, 24, 36],
    "750": [0.016, 0.021, 0.024, 0.025, 0.026, 0.025, 0.025, 0.026],
    "1000": [0.010, 0.016, 0.018, 0.020, 0.020, 0.020, 0.020, 0.021],
    "1250": [0.009, 0.014, 0.017, 0.019, 0.019, 0.018, 0.019, 0.019],
    "1500": [0.009, 0.014, 0.016, 0.018, 0.019, 0.018, 0.018, 0.019]
}

df = pd.DataFrame(data)

# Plotting with Plotly
fig = go.Figure()

fig.add_trace(go.Scatter(x=df["Horizon"], y=df["750"], mode='lines+markers', name='Error for column 750', marker=dict(symbol='circle')))
fig.add_trace(go.Scatter(x=df["Horizon"], y=df["1000"], mode='lines+markers', name='Error for column 1000', marker=dict(symbol='x')))
fig.add_trace(go.Scatter(x=df["Horizon"], y=df["1250"], mode='lines+markers', name='Error for column 1250', marker=dict(symbol='triangle-up')))
fig.add_trace(go.Scatter(x=df["Horizon"], y=df["1500"], mode='lines+markers', name='Error for column 1500', marker=dict(symbol='square')))

fig.update_layout(title='Error under columns 750, 1000, 1250, and 1500 vs. Horizon',
                  xaxis_title='Horizon',
                  yaxis_title='Error',
                  template='plotly_white')

fig.show()