# Convert forecasts
This notebook is used to convert quantile forecasts to a parametric forecast that can be used as the input for the optimization process.

## Data Read-In

In [1]:
import pandas as pd
import numpy as np
import glob  as glob
import os
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import plotly.graph_objects as go
import scipy.optimize
import inspect
import os
from datetime import datetime

In [2]:
path_forecast = '../data/quantile_forecasts/2025-03-20_07-14-26_fixed_diff/patch_tst_2025-03-20_07-14-26_prosumption_hour_0.csv'
path_gt = '../data/ground_truth/residential4_prosumption.csv'

def read_in_data(path):
    forecast = pd.read_csv(path, index_col=0, parse_dates=True)
    forecast.index = pd.to_datetime(forecast.index)
    return forecast

forecast = read_in_data(path_forecast)
forecast.columns = forecast.columns.str.replace('0_', '') # rename columns 0_0.01- 0_0.99 to 0.01 - 0.99
forecast.columns = forecast.columns.astype(float)

# Get the current date
current_date = datetime.now().strftime('%Y-%m-%d')

gt = read_in_data(path_gt)

## Visualization

In [5]:
def plot_specific_day(fc_df, gt_df, day, title=None):
    plt.figure(figsize=(16,8))
    plt.plot(fc_df.loc[day], label='Forecast', alpha=0.1)
    plt.plot(gt_df.loc[day], label='Ground Truth', alpha=1)
    plt.ylabel('Power in kW')
    plt.xlabel('Time')
    if title is not None:
        plt.title(title)
    plt.show()

# create an viszualisation of every hour given a forecast containing the quantiles 0.01 to 0.99
def plot_forecast_hour(fc_df, gt_df, day_and_hour, title=None):
    plt.figure(figsize=(16,8))
    plt.plot(fc_df.loc[day_and_hour].values, fc_df.columns, label='Forecast', alpha=1)
    # plot the ground truth as horizontal line
    gt_value = gt_df.loc[day_and_hour].values[0]
    plt.axhline(y=0, color='k', linestyle='-')
    plt.vlines(x=gt_value, ymin=0, ymax=0.9, color='r', linestyle='-', label='Ground Truth')
    plt.ylabel('Quantiles')
    plt.yticks(np.arange(0.01, 1.01, 0.02))
    plt.xlabel('Power in kW')
    if title is not None:
        plt.title(title)   
    plt.legend()
    plt.show()


date_to_visualize = '2017-05-18'
date_time_to_visualize = date_to_visualize + ' 10:00:00'
#plot_specific_day(forecast, gt, date_to_visualize, title=f'Quantile Forecast and Ground Truth for {date_to_visualize}')
#plot_forecast_hour(forecast, gt, date_time_to_visualize, title=f'Forecasted CDF and Ground Truth for {date_time_to_visualize}')

## Sort the Quantiles

In [6]:
# go through every row of the dataframe and ensure that the values in ascending order (should be the case already => see np.quantile() docs)
def sort_quantiles(fc_df):
    fc_sorted = []
    for index, row in fc_df.iterrows():
        # sort the values in ascending order
        sorted_values = row.sort_values()
        fc_sorted.append(sorted_values.values)
    return pd.DataFrame(fc_sorted, index=fc_df.index, columns=fc_df.columns)

forecast_sorted = sort_quantiles(forecast)

# plot the sorted forecast
#plot_specific_day(forecast_sorted, gt, date_to_visualize, title=f'Sorted Quantile Forecast and Ground Truth for {date_to_visualize}')
#plot_forecast_hour(forecast_sorted, gt, date_time_to_visualize, title=f'Sorted Forecasted CDF and Ground Truth for {date_time_to_visualize}')

## Smooth the quantiles

In [7]:
def smooth_quantiles(fc_df, window_size=2): # Paper: Window size of 2 was used
    fc_smoothed = []
    for index, row in fc_df.iterrows():
        smoothed_values = row.rolling(window=window_size, min_periods=1, center=True).mean()
        fc_smoothed.append(smoothed_values.values)
    return pd.DataFrame(fc_smoothed, index=fc_df.index, columns=fc_df.columns)

forecast_smoothed = smooth_quantiles(forecast_sorted)

#plot_specific_day(forecast_smoothed, gt, date_to_visualize, title=f'Smoothed Quantile Forecast and Ground Truth for {date_to_visualize}')
#plot_forecast_hour(forecast_smoothed, gt, date_time_to_visualize, title=f'Smoothed Forecasted CDF and Ground Truth for {date_time_to_visualize}')

In [8]:
forecast_smoothed

Unnamed: 0_level_0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.10,...,0.90,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99
cest_timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-04-29 00:00:00,-0.088736,0.062334,0.215269,0.222432,0.250524,0.278686,0.303410,0.342740,0.369433,0.382818,...,0.854089,0.857569,0.864555,0.881226,0.909894,0.945160,0.966720,0.971184,1.033533,1.153084
2017-04-29 01:00:00,-0.246200,-0.170477,-0.016956,0.084237,0.157720,0.222584,0.240371,0.252739,0.272025,0.316380,...,0.829856,0.832263,0.853604,0.884823,0.933797,0.974604,0.986610,1.036761,1.120341,1.300019
2017-04-29 02:00:00,-0.358840,-0.306720,-0.129071,0.022899,0.055056,0.109710,0.174145,0.193394,0.218671,0.249962,...,0.753570,0.766584,0.783162,0.805731,0.822352,0.829258,0.861039,0.940130,1.064610,1.300827
2017-04-29 03:00:00,-0.581116,-0.402317,-0.171776,-0.088659,-0.036872,-0.005473,0.022087,0.044738,0.085716,0.132478,...,0.840708,0.853068,0.875583,0.896191,0.926247,0.950419,0.957082,1.039703,1.228680,1.425811
2017-04-29 04:00:00,-0.430553,-0.348175,-0.171781,-0.061813,0.029080,0.104678,0.106854,0.111558,0.125977,0.140758,...,0.928122,0.946971,0.974712,1.007628,1.046656,1.085305,1.140483,1.218392,1.292737,1.354968
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-02-05 19:00:00,0.329700,0.358376,0.446762,0.512544,0.550587,0.601986,0.641072,0.681090,0.714447,0.741927,...,1.734503,1.772102,1.807512,1.849991,1.925709,1.995765,2.090136,2.169781,2.194258,2.373787
2018-02-05 20:00:00,0.455186,0.558879,0.673656,0.702583,0.731188,0.742090,0.748695,0.762369,0.785514,0.821303,...,1.652376,1.684129,1.716194,1.756703,1.791854,1.877266,1.936726,1.951387,1.982981,2.037228
2018-02-05 21:00:00,0.649627,0.678509,0.725053,0.770708,0.799439,0.806857,0.825675,0.841660,0.849222,0.859704,...,1.374251,1.409378,1.453905,1.488132,1.497791,1.538449,1.596786,1.657152,1.730530,1.808728
2018-02-05 22:00:00,0.649483,0.688896,0.768489,0.839951,0.881977,0.897562,0.910081,0.919732,0.926704,0.932689,...,1.227130,1.251302,1.288781,1.348164,1.427969,1.486746,1.499385,1.555887,1.672344,1.797072


## Gaussian Mixture Fit


In [9]:
from scipy.interpolate import interp1d
from sklearn.mixture import GaussianMixture
import matplotlib.pyplot as plt
from scipy import stats
from scipy.stats import norm


# Create the GMM forecasts
#gmm_timerange = ['2017-05-12', '2017-05-22']
gmm_timerange = ['2017-05-01', '2017-08-01']
forecast_gmm = forecast_smoothed.loc[gmm_timerange[0] : gmm_timerange[1]].copy()

weights_gmm = []

quantile_levels = forecast_gmm.columns.astype(float)  # Quantile levels (e.g., 1% to 99%)
for t, quantile_values in forecast_gmm.iterrows():

    # Step 1: Create an interpolator to map quantile levels to quantile values
    inv_cdf = interp1d(quantile_levels, quantile_values, kind='linear', fill_value="extrapolate")

    # Step 2: Generate synthetic samples via interpolation
    np.random.seed(42)
    synthetic_x = np.random.uniform(0.0, 1.0, 20000)
    synthetic_samples = inv_cdf(synthetic_x)  # Generate aritificial quantile value samples

    # Step 3: Fit GMM to synthetic samples
    gmm = GaussianMixture(n_components=2, covariance_type='full', random_state=42, reg_covar=1e-4)
    gmm.fit(synthetic_samples.reshape(-1, 1))

    # Extract parameters
    weights = gmm.weights_
    means = gmm.means_.flatten()
    variances = gmm.covariances_.flatten()

    # Transform variance to standard deviation
    standard_devs = np.sqrt(variances)

    weights_gmm.append([weights[0], means[0], standard_devs[0], weights[1], means[1], standard_devs[1]])


# Create a dataframe with the GMM weights
columns = ['w1', 'mu1', 'std1', 'w2', 'mu2', 'std2']

df_weights_gmm = pd.DataFrame(weights_gmm, index=forecast_gmm.index, columns=columns)


# Extract the expected values from the GMM weights and shift the GMM fits to have an expected value of 0
weights_gmm_shifted = []
expected_vals_gmm = []

for t, weights in df_weights_gmm.iterrows():
    w1, mu1, std1, w2, mu2, std2 = weights
    expected_value = w1 * mu1 + w2 * mu2
    mu1_shifted = mu1 - expected_value
    mu2_shifted = mu2 - expected_value
    weights_gmm_shifted.append([w1, mu1_shifted, std1, w2, mu2_shifted, std2])
    expected_vals_gmm.append([expected_value])

df_weights_gmm_shifted = pd.DataFrame(weights_gmm_shifted, index=df_weights_gmm.index, columns=columns)
df_exp_val_gmm = pd.DataFrame(expected_vals_gmm, index=df_weights_gmm.index, columns=['expected_value'])


In [13]:
# save the GMM forecasts 
folder_name = f'gmm2_forecast_{current_date}_hour_0'
os.makedirs('../data/parametric_forecasts/' + folder_name, exist_ok=True)

df_exp_val_gmm.to_csv(f'../data/parametric_forecasts/{folder_name}/expected_value_forecast.csv')
df_weights_gmm_shifted.to_csv(f'../data/parametric_forecasts/{folder_name}/cdf_weights.csv')

### Automated Run


In [14]:
for i in range(1,24):
    hour = str(i)
    path_forecast = '../data/quantile_forecasts/2025-03-20_07-14-26_fixed_diff/patch_tst_2025-03-20_07-14-26_prosumption_hour_' + hour + '.csv'

    path_gt = '../data/ground_truth/residential4_prosumption.csv'


    forecast = read_in_data(path_forecast)
    forecast.columns = forecast.columns.str.replace('0_', '') # rename columns 0_0.01- 0_0.99 to 0.01 - 0.99
    forecast.columns = forecast.columns.astype(float)

    # Get the current date
    current_date = datetime.now().strftime('%Y-%m-%d')

    gt = read_in_data(path_gt)
    forecast_sorted = sort_quantiles(forecast)
    forecast_smoothed = smooth_quantiles(forecast_sorted)



    gmm_timerange = ['2017-05-01', '2017-08-01']
    forecast_gmm = forecast_smoothed.loc[gmm_timerange[0] : gmm_timerange[1]].copy()

    weights_gmm = []

    quantile_levels = forecast_gmm.columns.astype(float)  # Quantile levels (e.g., 1% to 99%)
    for t, quantile_values in forecast_gmm.iterrows():

        # Step 1: Create an interpolator to map quantile levels to quantile values
        inv_cdf = interp1d(quantile_levels, quantile_values, kind='linear', fill_value="extrapolate")

        # Step 2: Generate synthetic samples via interpolation
        np.random.seed(42)
        synthetic_x = np.random.uniform(0.0, 1.0, 20000)
        synthetic_samples = inv_cdf(synthetic_x)  # Generate aritificial quantile value samples

        # Step 3: Fit GMM to synthetic samples
        gmm = GaussianMixture(n_components=2, covariance_type='full', random_state=42, reg_covar=1e-4)
        gmm.fit(synthetic_samples.reshape(-1, 1))

        # Extract parameters
        weights = gmm.weights_
        means = gmm.means_.flatten()
        variances = gmm.covariances_.flatten()

        # Transform variance to standard deviation
        standard_devs = np.sqrt(variances)

        weights_gmm.append([weights[0], means[0], standard_devs[0], weights[1], means[1], standard_devs[1]])


    # Create a dataframe with the GMM weights
    columns = ['w1', 'mu1', 'std1', 'w2', 'mu2', 'std2']

    df_weights_gmm = pd.DataFrame(weights_gmm, index=forecast_gmm.index, columns=columns)


    # Extract the expected values from the GMM weights and shift the GMM fits to have an expected value of 0
    weights_gmm_shifted = []
    expected_vals_gmm = []

    for t, weights in df_weights_gmm.iterrows():
        w1, mu1, std1, w2, mu2, std2 = weights
        expected_value = w1 * mu1 + w2 * mu2
        mu1_shifted = mu1 - expected_value
        mu2_shifted = mu2 - expected_value
        weights_gmm_shifted.append([w1, mu1_shifted, std1, w2, mu2_shifted, std2])
        expected_vals_gmm.append([expected_value])

    df_weights_gmm_shifted = pd.DataFrame(weights_gmm_shifted, index=df_weights_gmm.index, columns=columns)
    df_exp_val_gmm = pd.DataFrame(expected_vals_gmm, index=df_weights_gmm.index, columns=['expected_value'])

    # save the GMM forecasts 
    folder_name = f'gmm2_forecast_{current_date}_hour_'+hour
    os.makedirs('../data/parametric_forecasts/' + folder_name, exist_ok=True)

    df_exp_val_gmm.to_csv(f'../data/parametric_forecasts/{folder_name}/expected_value_forecast.csv')
    df_weights_gmm_shifted.to_csv(f'../data/parametric_forecasts/{folder_name}/cdf_weights.csv')

