In [None]:
from insert_missing_row import prepare_spreadsheet
from df_per_country import dataframe_per_country
from df_per_country import country_list
import matplotlib.pyplot as plt
import numpy as np
from generation_interpolation import generation_interpolation
import math
from collections import defaultdict
from helper_functions import months, index_to_month, month_to_index


print("Starting script")
# Prepare spreadsheet
# Add entry that got lost due to shift from summer to winter time
# Read csv files to obtain
# df_g = generation data
# df_l = load data
# df_c = c data
df_g, df_l, df_c = prepare_spreadsheet()
# For every country create a dataframe with generation, load and transmission data side by side
country_dataframes = dataframe_per_country(df_g, df_l, df_c, country_list)

rel_error = {}
abs_error = {}
sum_generation = {}
sum_load = {}
sum_import = {}
sum_export = {}
rel_extra_generation = {}
rel_extra_load = {}
# Remove Ukraine, Turkey, Russia, Belarus, Malta from the dictionary, since no load data is available
# Remove Kosovo, Macedonia, Bosnia Herzegovina, Luxemburg, Albania, Croatia since there is no generation data available
del country_dataframes["TR"]
del country_dataframes["UA"]
del country_dataframes["RU"]
del country_dataframes["BY"]
del country_dataframes["MT"]
del country_dataframes["MK"]
del country_dataframes["KV"]
del country_dataframes["BA"]
del country_dataframes["LU"]
del country_dataframes["AL"]
del country_dataframes["HR"]


# For every country add a column "Error" = Generation + Import - Load - Export
for country, df in country_dataframes.items():  
    all_cols = country_dataframes[country].columns.values
    # Get generation columns
    g_cols = [col for col in all_cols if "generation" in col]
    # Get import columns
    i_cols = [col for col in all_cols if "crossborder" in col and country not in col[0:2]]
    # Get load columns
    l_cols = [col for col in all_cols if "load" in col]
    # Get export columns
    e_cols = [col for col in all_cols if "crossborder" in col and country in col[0:2]]
    
   
    
    # Adding the error column
    df["error"] = abs(df[g_cols].sum(axis = 1) + df[i_cols].sum(axis = 1) - df[l_cols].sum(axis = 1) - df[e_cols].sum(axis = 1))
    # Get the total generation
    df[country + "_total_generation"] = df[g_cols].sum(axis = 1)
    # Get the total imports and export
    df[country + "_total_import"] = df[i_cols].sum(axis = 1)
    df[country + "_total_export"] = df[e_cols].sum(axis = 1)
    # Add the relative absolute error to a dictionary
    rel_error[country] = df["error"].sum(axis = 0) / df[country + "_total_generation"].sum(axis = 0)
    abs_error[country] = df["error"].sum(axis = 0)
    # Add sum of generation, load, import, export
    sum_generation[country] = df[country + "_total_generation"].sum(axis = 0)
    sum_load[country] = df[country + "_load"].sum(axis = 0)
    sum_import[country] = df[country + "_total_import"].sum(axis = 0)
    sum_export[country] = df[country + "_total_export"].sum(axis = 0)

    # Plot sum of generation, load, import and export
    #fig = plt.figure()
    #ax = fig.add_axes([0, 0, 1, 1])
    #ax.set_title(country)
    #ax.bar(["generation", "load", "import/export", "mismatch"], [sum_generation[country], sum_load[country], sum_import[country] - sum_export[country], abs_error[country]])
    
    # Implement the increase method
    # Assume the reported data is correct, hence we have to add on top unreported data
    # We have to variable scaling factors alpha_g and alpha_l per time step and country
    # We will only increase, alpha_g and alpha_l > 1.0
    # This can be done by case discrimination if mismatch > 0, increase alpha_l, otherwise increase alpha_g
    df["mismatch"] = df[g_cols].sum(axis = 1) + df[i_cols].sum(axis = 1) - df[l_cols].sum(axis = 1) - df[e_cols].sum(axis = 1)
    # Set alpha_g and alpha_l equal to 1.0 as default
    df["alpha_g"] = 1.0
    df["alpha_l"] = 1.0
    df.loc[df["mismatch"] < 0, "alpha_g"] = (df[country + "_total_generation"] - df["mismatch"]) / df[country + "_total_generation"]

    df.loc[df["mismatch"] > 0, "alpha_l"] = (df[country + "_load"] + df["mismatch"]) / df[country + "_load"]
    df["extra_generation"] = (df["alpha_g"] - 1) * df[country + "_total_generation"]
    df["extra_load"] = (df["alpha_l"] - 1) * df[country + "_load"]
    #fig = plt.figure()
    #ax = fig.add_axes([0, 0, 1, 1])
    #ax.set_title(country + " extra generation / extra load")
    #ax.bar(["generation", "extra generation", "load", "extra load"], [sum_generation[country], \
    #     df["extra_generation"].sum(axis = 0), sum_load[country], df["extra_load"].sum(axis = 0)])
    # Quick fix for alpha_g = infinity due to data gaps
    df.loc[df["alpha_g"] > 50000, "alpha_g"] = 1.0
    rel_extra_generation[country] = df["alpha_g"].sum(axis = 0) / len(df["alpha_g"])
    rel_extra_load[country] = df["alpha_l"].sum(axis = 0) / len(df["alpha_l"])

# Get 5 countries with the biggest relative mismatch
list_rel_error = [(k, v) for k, v in rel_error.items()]
list_rel_error.sort(key=lambda x:x[1], reverse = True)

# Get 5 countries with biggest extra generation after applying the strictly increasing method
list_rel_extra_generation = [(k, v) for k, v in rel_extra_generation.items()]
list_rel_extra_generation.sort(key=lambda x:x[1], reverse = True)
top_extra_generation = list_rel_extra_generation[0:5]

# Get 5 countries with biggest extra load after applying the strictly increasing method
list_rel_extra_load = [(k, v) for k, v in rel_extra_load.items()]
list_rel_extra_load.sort(key=lambda x:x[1], reverse = True)
top_extra_load = list_rel_extra_load[0:5]



generation_month = defaultdict(dict)
extra_generation_month = defaultdict(dict)
print("Plotting the countries with the biggest extra generation")    
# Plot top 5 countries with largest extra generation
for country, df in country_dataframes.items():    
    if country in [k for k, v in top_extra_generation]:
        for i in range(12):
            #print("Now being in", country)
            df_monthly = df.iloc[month_to_index[i]:month_to_index[i+1]]
            #df_monthly.to_csv("../temp_output/debugging_monthly_df/" + country + str(i + 1) + ".csv")
            """
            fig = plt.figure()
            ax = fig.add_axes([0, 0, 1, 1])
        
            ax.set_title(country + " top 5 extra generation " + index_to_month[i])
            ax.bar(["generation", "extra generation"], 
                   [df_monthly[country+ "_total_generation"].sum(axis = 0),\
                    df_monthly["extra_generation"].sum(axis = 0)])
            """
            
            generation_month[country][i] = df_monthly[country+ "_total_generation"].sum(axis = 0)
            extra_generation_month[country][i] = df_monthly["extra_generation"].sum(axis = 0)
        #print("Country:", country)
        #print("Generation", generation_month[country])
        #print("Extra generation", extra_generation_month[country])
        # Plot the 2-line 12-months diagram
        x = months
        y1 = [v for k, v in generation_month[country].items()]
        y2 = [v for k, v in extra_generation_month[country].items()]
        fig = plt.figure()
        ax = fig.add_axes([0,0,1,1])
        l1 = ax.plot(x,y1,'bs-') # solid line with yellow colour and square marker
        l2 = ax.plot(x,y2,'ro--') # dash line with green colour and circle marker
        ax.legend(labels = ("generation", "extra generation"), loc = 'lower right') # legend placed at lower right
        ax.set_title("12 months extra generation " + country)
        ax.set_xlabel("Months")
        ax.set_ylabel("MW")
        plt.xticks(rotation = 70)

In [None]:
load_month = defaultdict(dict)
extra_load_month = defaultdict(dict)    
print("Plotting the countries with the biggest extra load")    
# Plot top 5 countries with largest extra load    
for country, df in country_dataframes.items():    
    if country in [k for k, v in top_extra_load]:
        for i in range(12):
            #print("Now being in", country)
            df_monthly = df.iloc[month_to_index[i]:month_to_index[i+1]]
            #df_monthly.to_csv("../temp_output/debugging_monthly_df/" + country + str(i + 1) + ".csv")
            """
            fig = plt.figure()
            ax = fig.add_axes([0, 0, 1, 1])
        
            ax.set_title(country + " top 5 extra generation " + index_to_month[i])
            ax.bar(["generation", "extra generation"], 
                   [df_monthly[country+ "_total_generation"].sum(axis = 0),\
                    df_monthly["extra_generation"].sum(axis = 0)])
            """
            
            load_month[country][i] = df_monthly[country+ "_load"].sum(axis = 0)
            extra_load_month[country][i] = df_monthly["extra_load"].sum(axis = 0)
        #print("Country:", country)
        #print("Generation", generation_month[country])
        #print("Extra generation", extra_generation_month[country])
        # Plot the 2-line 12-months diagram
        x = months
        y1 = [v for k, v in load_month[country].items()]
        y2 = [v for k, v in extra_load_month[country].items()]
        fig = plt.figure()
        ax = fig.add_axes([0,0,1,1])
        l1 = ax.plot(x,y1,'bs-') # solid line with yellow colour and square marker
        l2 = ax.plot(x,y2,'ro--') # dash line with green colour and circle marker
        ax.legend(labels = ("load", "extra load"), loc = 'lower right') # legend placed at lower right
        ax.set_title("12 months extra load of " + country)
        ax.set_xlabel("Months")
        ax.set_ylabel("MW")
        plt.xticks(rotation = 70)
        
print("End of script")