### Simple metrics
- Weighted average
- Median
- Variance
- Annual root mean squared error (RMSE)

Outliers are always removed, VOLL = 9e06, large impact on metrics.

In [47]:
import pandas as pd
import numpy as np

# Define the range of years
years = range(2012, 2022)

# Create an empty DataFrame to store the results
results_df = pd.DataFrame()

# Iterate over the years
for year in years:
    # Read the empirical prices
    empirical_prices = pd.read_csv(f"{year}prices_emp.csv")
    
    # Read the synthetic prices and add a Time_Index column
    synthetic_prices = pd.read_csv(f"{year}prices.csv")
    synthetic_prices['Time_Index'] = synthetic_prices.index + 1
    
    # Read the weights
    weights = pd.read_csv(f"{year}Load_data.csv")
    
    # Merge the data based on Time_Index
    merged_data = empirical_prices.merge(synthetic_prices, on='Time_Index').merge(weights, on='Time_Index')
    
    # Exclude the n highest prices for empirical and synthetic prices
    n = 15
    empirical_prices_excluded = empirical_prices.nsmallest(len(empirical_prices) - n, 'Price')
    synthetic_prices_excluded = synthetic_prices.nsmallest(len(synthetic_prices) - n, '1')
    
    # Calculate the weighted average for empirical prices (excluding the highest prices)
    empirical_weighted_avg = (merged_data.loc[merged_data.index.isin(empirical_prices_excluded.index), 'Price'] * merged_data.loc[merged_data.index.isin(empirical_prices_excluded.index), 'Load_MW_z1']).sum() / merged_data.loc[merged_data.index.isin(empirical_prices_excluded.index), 'Load_MW_z1'].sum()
    
    # Calculate the median for empirical prices (excluding the highest prices)
    empirical_median = np.median(empirical_prices_excluded['Price'])
    
    # Calculate the variance for empirical prices (excluding the highest prices)
    empirical_variance = empirical_prices_excluded['Price'].var()

    # Calculate the weighted average for synthetic prices (excluding the highest prices)
    synthetic_weighted_avg = (merged_data.loc[merged_data.index.isin(synthetic_prices_excluded.index), '1'] * merged_data.loc[merged_data.index.isin(synthetic_prices_excluded.index), 'Load_MW_z1']).sum() / merged_data.loc[merged_data.index.isin(synthetic_prices_excluded.index), 'Load_MW_z1'].sum()
    
    # Calculate the median for synthetic prices (excluding the highest prices)
    synthetic_median = np.median(synthetic_prices_excluded['1'])

    # Calculate the variance for synthetic prices (excluding the highest prices)
    synthetic_variance = synthetic_prices_excluded['1'].var()
    
    # Calculate the difference between the weighted averages
    weighted_avg_diff = empirical_weighted_avg - synthetic_weighted_avg
    
    # Calculate the difference between the medians
    median_diff = empirical_median - synthetic_median

    # Calculate the difference between the variances
    variance_diff = empirical_variance - synthetic_variance

    # Calculate the root mean squared error between the empirical and synthetic prices, excluding the highest prices
    rmse = np.sqrt(((empirical_prices_excluded['Price'] - synthetic_prices_excluded['1']) ** 2).mean())
    
    # Print the results for the year
    print(f"Year: {year}")
    print(f"Empirical weighted Average: {empirical_weighted_avg}")
    print(f"Synthetic weighted Average: {synthetic_weighted_avg}")
    print(f"Average Difference: {weighted_avg_diff}\n")
    print(f"Empirical Median: {empirical_median}")
    print(f"Synthetic Median: {synthetic_median}")
    print(f"Median Difference: {median_diff}\n")
    print(f"Empirical Variance: {empirical_variance}")
    print(f"Synthetic Variance: {synthetic_variance}")
    print(f"Variance Difference: {variance_diff}\n")
    print(f"RMSE: {rmse}\n")
    
    # Create a DataFrame for the current year's metrics
    year_metrics = pd.DataFrame({'Year': [year],
                                 'Empirical Average': [empirical_weighted_avg],
                                 'Synthetic Average': [synthetic_weighted_avg],
                                 'Average Difference': [weighted_avg_diff],
                                 'Empirical Median': [empirical_median],
                                 'Synthetic Median': [synthetic_median],
                                 'Median Difference': [median_diff],
                                 'Empirical Variance': [empirical_variance],
                                 'Synthetic Variance': [synthetic_variance],
                                 'Variance Difference': [variance_diff],
                                 'RMSE': [rmse]})
    
    # Concatenate the year's metrics with the existing results DataFrame
    results_df = pd.concat([results_df, year_metrics], ignore_index=True)

# Read the existing metrics_simple.csv file
existing_metrics = pd.read_csv('metrics_simple.csv')

# Delete the previous results for this metric, if they exist. Drop everything except the "Year" column
if 'Empirical Average' in existing_metrics.columns:
    existing_metrics = existing_metrics.drop(columns=['Empirical Average',
                                                      'Synthetic Average',
                                                      'Average Difference',
                                                      'Empirical Median',
                                                      'Synthetic Median',
                                                      'Median Difference',
                                                      'Empirical Variance',
                                                      'Synthetic Variance',
                                                      'Variance Difference',
                                                      'RMSE'])


# Merge the existing metrics with the new results based on the "Year" column
merged_metrics = existing_metrics.merge(results_df, on='Year')

# Save the merged metrics to the metrics_simple.csv file
merged_metrics.to_csv('metrics_simple.csv', index=False)

Year: 2012
Empirical weighted Average: 28.201155410541354
Synthetic weighted Average: 31.347298925823317
Average Difference: -3.1461435152819632

Empirical Median: 22.98
Synthetic Median: 29.324429
Median Difference: -6.344428999999998

Empirical Variance: 279.8241386797161
Synthetic Variance: 124.95947760710827
Variance Difference: 154.86466107260782

RMSE: 15.472198551959897

Year: 2013
Empirical weighted Average: 33.36489334219304
Synthetic weighted Average: 37.08112741639506
Average Difference: -3.7162340742020206

Empirical Median: 29.53
Synthetic Median: 33.418197134805
Median Difference: -3.8881971348049973

Empirical Variance: 119.27554649130352
Synthetic Variance: 153.38603407272808
Variance Difference: -34.110487581424564

RMSE: 11.155609186420614

Year: 2014
Empirical weighted Average: 39.3503377947257
Synthetic weighted Average: 39.94545432178903
Average Difference: -0.5951165270633325

Empirical Median: 33.02
Synthetic Median: 33.833085
Median Difference: -0.81308499999999

### Pearson, Spearman 
- Annual coefficients
- Daily coefficients

In [80]:
import pandas as pd
from scipy.stats import pearsonr
from scipy.stats import spearmanr

# Define the range of years
years = range(2012, 2022)

# Create an empty list to store the correlation coefficients
daily_coefficients = pd.DataFrame()
annual_coefficients = pd.DataFrame()

# Iterate over the years
for year in years:
    # Read the empirical prices
    empirical_prices = pd.read_csv(f"{year}prices_emp.csv")
    
    # Read the synthetic prices
    synthetic_prices = pd.read_csv(f"{year}prices.csv")
    synthetic_prices['Time_Index'] = synthetic_prices.index + 1
    
    # Exclude the n highest prices from synthetic dataset, and the corresponding prices from the empirical dataset
    n = 15
    synthetic_prices_excluded = synthetic_prices.nsmallest(len(synthetic_prices) - n, "1")
    # sort the synthetic prices by index
    synthetic_prices_excluded = synthetic_prices_excluded.sort_values(by=['Time_Index'])
    empirical_prices_excluded = empirical_prices.loc[empirical_prices.index.isin(synthetic_prices_excluded.index)]
    
    # Extract the price columns
    empirical_price_column = empirical_prices_excluded["Price"]
    synthetic_price_column = synthetic_prices_excluded["1"]
    
    # Compute the Pearson and Spearman correlation
    pearson_annual, _ = pearsonr(empirical_price_column, synthetic_price_column)
    spearman_annual, _ = spearmanr(empirical_price_column, synthetic_price_column)

    step_size = 24
    pearson_daily = []
    spearman_daily = []
    days = []
    for i in range(0, len(empirical_price_column), step_size):
        y1 = empirical_price_column.iloc[i:i+step_size]
        y2 = synthetic_price_column.iloc[i:i+step_size]
        p_corr, _ = pearsonr(y1, y2)
        s_corr, _ = spearmanr(y1, y2)
        pearson_daily.append(p_corr)
        spearman_daily.append(s_corr)
        days.append(i/24+1)
    
    # Concatenate the year's annual coefficients with the existing annual_coefficients DataFrame
    annual_coefficients = pd.concat([annual_coefficients, pd.DataFrame({'Year': [year],
                                                                        'Pearson annual': [pearson_annual],
                                                                        'Spearman annual': [spearman_annual]})
                                        ], ignore_index=True)
    
    # Add the daily pearson and spearman coefficients to the existing dataframe
    if 'Day' in daily_coefficients.columns:
        daily_coefficients = daily_coefficients.drop(columns=['Day'])
    daily_coefficients[f'{year}Pearson daily'] = pearson_daily
    daily_coefficients[f'{year}Spearman daily'] = spearman_daily
    daily_coefficients['Day'] = days

    print(f"Year: {year}")
    print(f"Annual Pearson: {pearson_annual}")
    print(f"Annual Spearman: {spearman_annual}")

# Read the existing files
existing_annual_coefficients = pd.read_csv('annual_coefficients.csv')
existing_daily_coefficients = pd.read_csv('daily_coefficients.csv')

# Delete the previous results, if they exist. Drop everything except the "Year" (or Day) column
if 'Pearson annual' in existing_annual_coefficients.columns:
    existing_annual_coefficients = existing_annual_coefficients.drop(columns=['Pearson annual',
                                                                              'Spearman annual'])
if '2012Pearson daily' in existing_daily_coefficients.columns:
    for year in years:
        existing_daily_coefficients = existing_daily_coefficients.drop(columns=[f'{year}Pearson daily',
                                                                            f'{year}Spearman daily'])

# Merge the existing results with the new results based on the "Year" (or Day) column
merged_annual_coefficients = existing_annual_coefficients.merge(annual_coefficients, on='Year')
merged_daily_coefficients = existing_daily_coefficients.merge(daily_coefficients, on='Day')
# merged_spearman_coefficients = existing_spearman_coefficients.merge(spearman_coefficients, on='Day')

# Save the merged coefficients to the files
merged_annual_coefficients.to_csv('annual_coefficients.csv', index=False)
merged_daily_coefficients.to_csv('daily_coefficients.csv', index=False)
# merged_spearman_coefficients.to_csv('spearman_daily.csv', index=False)

Year: 2012
Annual Pearson: 0.3424633742580565
Annual Spearman: 0.7844559392715036
Year: 2013
Annual Pearson: 0.5378072368361123
Annual Spearman: 0.8119328185734058
Year: 2014
Annual Pearson: 0.42973033922453585
Annual Spearman: 0.8212555556211188
Year: 2015
Annual Pearson: 0.3110957481792914
Annual Spearman: 0.7949795689144594
Year: 2016
Annual Pearson: 0.6069158306861713
Annual Spearman: 0.7871957326779776
Year: 2017
Annual Pearson: 0.5795466268809675
Annual Spearman: 0.836005334053351
Year: 2018
Annual Pearson: 0.3010603400970494
Annual Spearman: 0.8505877506483107
Year: 2019
Annual Pearson: 0.28528987191966876
Annual Spearman: 0.8416145985911915
Year: 2020
Annual Pearson: 0.3811249889721622
Annual Spearman: 0.7962274988498416
Year: 2021
Annual Pearson: 0.3020536600120838
Annual Spearman: 0.8511453780835465
