# External Inconsistencies

In [1]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
from IPython.display import display

In [None]:
def plot(x1, x2, labels, color1, color2, label1, label2, title):

    # We made this function to draw the comparission graphs. This can create multiple barcharts.

    width = 0.35
    X = np.arange(len(labels))
    plt.figure(figsize=(20, 10))
    plt.bar([x-width/2 for x in X], x1, width,color=color1, edgecolor='black', label=label1)
    plt.bar([x+width/2 for x in X], x2, width,color=color2, edgecolor='black', label=label2)
    plt.xlabel('Countries')
    plt.ylabel('Energy [GWh]')
    plt.title(title)
    plt.xticks(X, labels)
    plt.axhline(y=0, color='black', linestyle='-')
    plt.legend()
    plt.grid()
    plt.show()


## 1 Comparission of data between ENTSO-E & Eurostat

### In Eurostat data, in the csv with the generation data, it has both gross electricity production and net electricity production. In the transmission data csv, it has imports and exports of each country seperately.

### 1.1 Comparission of Generation data between ENTSO-E & Eurostat

In [None]:
def eurostat_comparission_gen_data(generation_data, countries):

    gen_eurostat = []
    gen_entsoe = []
    labels = []
    temp = []
    eurostat = pd.read_csv('../Data Sources/Eurostat/Gross & Net Generation .csv')
    eurostat = eurostat.loc[eurostat['TIME'] == 2018]

    # In the following command we first check if the countries in the ENTSO-E datasets are also available in the Eurostat dataset.
    # If yes, we append the 'labels' as the country code.
    # Here also we use 'pd.to_numeric' command because Eurostat data come in string format.
    # Then we update 'gen_eurostat' list by checking for the 'country' in 'GEO' column and 'Net electricity production' in the 'NRG_BAL' column of eurostat daraframe and take the respective value in the 'Value' column.
    # Then we get the sum along the '{country_code} - Total'column in the 'generation_data' dataframe and update the 'gen_entsoe' list with that value.
    # Then we create the dataframe which shows ENTSO-E generation, EUrostat generation and the percentage difference in generation wrt ENTSO-Edata for each country.
    # Then the graph is drawn using matplotlib.

    for country, abbr in countries.items():

        if country in eurostat.values:
            labels.append(abbr)
            gen_eurostat.append(round(pd.to_numeric(eurostat.loc[(eurostat['GEO'] == country) & (eurostat['NRG_BAL'] == 'Net electricity production'), 'Value'].iloc[0]), 2))
            gen_entsoe.append(
                round(generation_data.filter(like=abbr).sum(axis=1).sum()/1000, 2))
            temp.append([labels[-1], gen_entsoe[-1], gen_eurostat[-1],round((gen_entsoe[-1] - gen_eurostat[-1])*100/gen_entsoe[-1], 2)])

    table_eurostat_comparission_gen_data = pd.DataFrame(temp, columns=['Country', 'Gen data based on ENTSO-E [GWh]', 'Gen data based on Eurostat [GWh]', '% difference with respect to Value based on ENTSO-E [%]'])
    display(table_eurostat_comparission_gen_data)

    plot(gen_entsoe, gen_eurostat, labels, 'gold', 'lime','ENTSO-E data', 'Eurostat data', 'Comparission of annual electricity generation data in ENTSO-E and Eurostat data')


### 1.2 Comparission of Load data between ENTSO-E & Eurostat

In [None]:
def eurostat_comparission_load_data(load_data, countries):

    load_eurostat = []
    load_entsoe = []
    labels = []
    temp = []
    eurostat = pd.read_csv('../Data Sources/Eurostat/Load.csv')
    eurostat = eurostat.loc[eurostat['TIME'] == 2018]

    # In the following command we first check if the countries in the ENTSO-E datasets are also available in the Eurostat dataset.
    # If yes, we append the 'labels' as the country code.
    # Here also we use 'pd.to_numeric' command because Eurostat data come in string format.
    # Then we update 'load_eurostat' list by checking for the 'country' in 'GEO' column of eurostat daraframe and take the respective value in the 'Value' column.
    # Then we get the sum along the '{country_code}' column in the 'load_data' dataframe and update the 'load_entsoe' list with that value.
    # Then we create the dataframe which shows ENTSO-E load, EUrostat load and the percentage difference in load wrt ENTSO-Edata for each country.
    # Then the graph is drawn using matplotlib.

    for country, abbr in countries.items():

        if country in eurostat.values:
            labels.append(abbr)
            load_eurostat.append(round(pd.to_numeric(eurostat.loc[eurostat['GEO'] == country, 'Value'].iloc[0]), 2))
            load_entsoe.append(round(load_data[f'{abbr}'].sum()/1000, 2))
            temp.append([labels[-1], load_entsoe[-1], load_eurostat[-1],
                         round((load_entsoe[-1] - load_eurostat[-1])*100/load_entsoe[-1], 2)])

    table_eurostat_comparission_load_data = pd.DataFrame(temp, columns=['Country', 'Load data based on ENTSO-E [GWh]', 'Load data based on Eurostat [GWh]', '% difference with respect to Value based on ENTSO-E [%]'])
    display(table_eurostat_comparission_load_data)

    plot(load_entsoe, load_eurostat, labels, 'lightcoral', 'deepskyblue','ENTSO-E data', 'Eurostat data', 'Comparission of annual electricity load data in ENTSO-E and Eurostat data')


### 1.3 Comparission of Net Imports/Exports data between ENTSO-E & Eurostat

In [None]:
def eurostat_comparission_transmission_data(import_export_using_crossborder_data, countries):
    transmission_eurostat = []
    transmission_entsoe = []
    labels = []
    temp = []
    eurostat = pd.read_csv('../Data Sources/Eurostat/Transmission.csv')
    eurostat = eurostat.loc[eurostat['TIME'] == 2018]

    # In the following command we first check if the countries in the ENTSO-E datasets are also available in the Eurostat dataset.
    # If yes, we append the 'labels' as the country code.
    # Here also we use 'pd.to_numeric' command because Eurostat data come in string format.
    # Then we update 'transmission_eurostat' list by checking for the ('country' in 'GEO' column and 'Exports' in the 'NRG_BAL' column of eurostat daraframe) and (the 'country' in 'GEO' column and 'Imports' in the 'NRG_BAL' column of eurostat dataframe) and take the difference of the respective values in the 'Value' column.
    # Then we get the sum along the '{country_code} - [exp-imp] in the 'import_export_using_crossborder_data' dataframe and update the 'transmission_entsoe' list with that value.
    # Then we create the dataframe which shows ENTSO-E net imports/exports, EUrostat net imports/exports and the percentage difference in net imports/exports wrt ENTSO-Edata for each country.
    # Then the graph is drawn using matplotlib.

    for country, abbr in countries.items():

        if country in eurostat.values:
            labels.append(abbr)
            transmission_eurostat.append(round(pd.to_numeric(eurostat.loc[(
                eurostat['GEO'] == country) & (eurostat['NRG_BAL'] == 'Exports'), 'Value'].iloc[0]) - pd.to_numeric(eurostat.loc[(
                    eurostat['GEO'] == country) & (eurostat['NRG_BAL'] == 'Imports'), 'Value'].iloc[0]), 2))
            transmission_entsoe.append(
                round(import_export_using_crossborder_data[f'{abbr}'].sum()/1000, 2))
            temp.append([labels[-1], transmission_entsoe[-1], transmission_eurostat[-1], round(
                (transmission_entsoe[-1] - transmission_eurostat[-1])*100/transmission_entsoe[-1], 2)])

    table_eurostat_comparission_transmission_data = pd.DataFrame(temp, columns=['Country', 'Net imports/exports data based on ENTSO-E [GWh]', 'Net imports/exports data based on Eurostat [GWh]', '% difference with respect to Value based on ENTSO-E [%]'])
    display(table_eurostat_comparission_transmission_data)

    plot(transmission_entsoe, transmission_eurostat, labels, 'magenta', 'aqua','ENTSO-E data', 'Eurostat data', 'Comparission of annual electricity net export/import data in ENTSO-E and Eurostat data')
