In [None]:
import pandas as pd
import numpy as np
from IPython.display import display
import import_ipynb
import copy
import data_preperation as dp
import internal_inconsistencies as ii
import external_inconsistencies as ei


In [None]:
countries={ 'Austria': 'AT', 'Belgium': 'BE',  'Bulgaria': 'BG', 'Switzerland': 'CH', 'Czech Republic': 'CZ', 'Germany': 'DE', 'Denmark': 'DK', 'Estonia': 'EE', 'Spain': 'ES', 'Finland': 'FI', 'France': 'FR', 'United Kingdom': 'GB',  'Greece': 'GR', 'Hungary': 'HU', 'Ireland': 'IE', 'Italy': 'IT', 'Lithuania': 'LT', 'Latvia': 'LV', 'Montenegro': 'ME','Netherlands': 'NL', 'Norway': 'NO', 'Poland': 'PL', 'Portugal': 'PT', 'Serbia': 'RS', 'Sweden': 'SE', 'Slovenia': 'SI', 'Slovakia': 'SK'}
abbr_list=list(countries.values())

# 1. Data mismatches in ENTSO-E data

## 1.1 Calculating missing values in data files

In [None]:
df = ii.mismatch_raw_data(countries)
display(df)


## 1.2 Identifying inconsistencies in ENTSO-E Generation data

In [None]:
inconsistency_in_generation = ii.inconcistencies_generation_data()
display(inconsistency_in_generation)


# 2. ENTSO-E data representation

## 2.1 Fill the missing observations and get all the hourly load data of each country to a single dataframe

In [None]:
load_dic=dp.load(countries)
# display(load_dic)


## 2.2 Fill the missing observations and get all the hourly generation data of each country from each source to a single dataframe

In [None]:
generation_dic=dp.generation(countries)
# display(generation_dic)

## 2.3 Fill the missing observations and get all the hourly cross-border transmission data of each country to a single dataframe

In [None]:
transmission_data = dp.cross_border()
# display(transmission_data)
# 'cross_border_data' shows the net import/net export of the whole country in hourly timesteps
# display(crossborder_data)


## 2.4 Validating the missing observations filling model

In [None]:
selected_countries = {'Austria':"AT", 'Switzerland':'CH', 'Denmark':"DK", 'Portugal':"PT",'Sweden':'SE'}
dp.validation(selected_countries)

# 3. Consolidation based on internal data

## 3.1 Obtaining consolidated ENTSO-E data using Internal Sigma Approach

### !!! This step takes about 2 hours to complete in an Intel I5 processor with 8 GB Ram
### Licensed Gurobi Optimizer is required

In [None]:
load_dic = {}
generation_dic = {}
transmission_data = pd.DataFrame()

for abbr in abbr_list:
    load_dic[abbr] = pd.read_csv(f'../Data Sources/output/Polynomial Linear Regression/Load/{abbr}.csv').iloc[:,1:]
    generation_dic[abbr] = pd.read_csv(f'../Data Sources/output/Polynomial Linear Regression/Generation/{abbr}.csv').iloc[:,1:]
transmission_data = pd.read_csv(f'../Data Sources/output//Polynomial Linear Regression/Transmission/all_transmissions.csv').iloc[:, 1:]

In [None]:
load_dic_copy = copy.deepcopy(load_dic) 
generation_dic_copy = copy.deepcopy(generation_dic)
transmission_data_copy = copy.deepcopy(transmission_data)
consolidated_gen_data, consolidated_load_data, consolidated_transmission_data = ii.internal(load_dic_copy, generation_dic_copy, transmission_data_copy, countries)


# 4. Consolidation based on external data (Eurostat)

In [None]:
# In the country list Switzerland is removed because Eurostat data are not available for Switzerland
abbr_list = [country for country in abbr_list if country != 'CH']
consolidated_ext_gen_data = ei.external(abbr_list)