# External Inconsistencies

In [5]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
from IPython.display import display
import pyomo.environ as pyo
from pyomo.opt import SolverFactory


In [6]:
countries = {'Austria': 'AT', 'Belgium': 'BE',  'Bulgaria': 'BG', 'Switzerland': 'CH', 'Czech Republic': 'CZ',  'Germany': 'DE', 'Denmark': 'DK', 'Estonia': 'EE', 'Spain': 'ES', 'Finland': 'FI', 'France': 'FR',  'Greece': 'GR', 'Hungary': 'HU',
             'Ireland': 'IE', 'Italy': 'IT', 'Lithuania': 'LT', 'Latvia': 'LV', 'Montenegro': 'ME', 'Netherlands': 'NL', 'Norway': 'NO', 'Poland': 'PL', 'Portugal': 'PT', 'Serbia': 'RS', 'Sweden': 'SE', 'Slovenia': 'SI', 'Slovakia': 'SK', 'United Kingdom': 'GB'}
abbr_list = list(countries.values())

# In the country list Switzerland is removed because Eurostat data are not available for Switzerland
abbr_list = [country for country in abbr_list if country != 'CH']

In [7]:
def external(abbr_list):

    # 'generation_types' are defined as key as the Eurostat generation type and value as the ENTSO-E genration type associated with the key

    generation_types = {'Coal': ['Fossil Hard coal', 'Fossil Brown coal/Lignite'],
                        'Biomass': ['Biomass'],
                        'Fossil Gas': ['Fossil Gas', 'Fossil Coal-derived gas'],
                        'Fossil Oil': ['Fossil Oil'],
                        'Fossil Oil shale': ['Fossil Oil shale'],
                        'Fossil Peat': ['Fossil Peat'],
                        'Waste': ['Waste'],
                        'Geothermal': ['Geothermal'],
                        'Hydro': ['Hydro Pumped Storage', 'Hydro Run-of-river and poundage', 'Hydro Water Reservoir'],
                        'Nuclear': ['Nuclear'],
                        'Other renewable': ['Other renewable'],
                        'Solar': ['Solar'],
                        'Other': ['Other'],
                        'Wind': ['Wind Offshore', 'Wind Onshore']}

    # In the following section, generation capacity of each generation technology of each country are assigned to a dataframe called 'capacity'. 
    # All null values are converted to 0.
    
    df = pd.read_csv(f'../Data Sources/ENTSO-E/2018/Generation/Generation Capacity/DE.csv',low_memory=False).iloc[:-1, :]
    columns = df['Production Type'].values
    index = abbr_list
    capacity = pd.DataFrame(index=index, columns=columns)
    for abbr in abbr_list:
        df = pd.read_csv(f'../Data Sources/ENTSO-E/2018/Generation/Generation Capacity/{abbr}.csv', low_memory=False).iloc[:-1, :]
        for index, row in enumerate(df.values):
            capacity.loc[abbr][row[0]] = row[1]
    capacity = capacity.replace(['n/e', np.nan], 0)
    for column in capacity.columns.values:
        capacity[column] = pd.to_numeric(capacity[column])

    # 'time_index' is created with integers from 0 to 8759
    # 'eurostat_index' is created with the names of the Eurostat generation technologies.
    # 'solar_time_index' is cretaed to capture the hours in which the solar generation is 0 of the countries

    time_index = np.arange(8760, dtype=int)
    solar_time_index = {}
    em = pd.read_excel('../Data Sources/External sources.xlsx',sheet_name='EUROSTAT', index_col=0)
    eurostat_index = em.columns.values[:-3]

    # 'eurostat_total' dictionary is created with the Eurostat generation values of each generation technology of each country

    eurostat_total = {}
    for abbr in abbr_list:
        eurostat_total[abbr] = {}
        for item in eurostat_index:
            eurostat_total[abbr][item] = em[item][abbr]*1000

    capacity_total = {}
    consolidated = {}
    violations = {}
    consolidated_ext_gen_data = {}

    for abbr in abbr_list:
        generation_dic = {}
        df = pd.read_csv(f"../Data Sources/output/Internal/Generation/{abbr}.csv").iloc[:, 1:]

        # dictionary 'generation_dic' is filled wih the column sum values of internally consolidated generation data of each country

        for column in df.columns.values:
            generation_dic[column] = df[column].sum()

        # 'entsoe_index' is created with the generation technologies of ENTSO-E, which are the values of the 'generation_types' dictionary.

        entsoe_index = []
        for key, value in generation_types.items():
            i = 0
            while i < len(value):
                entsoe_index.append(value[i])
                i += 1

        # Dictionary 'capacity_total' is filled with the capacity values of each ENTSO-E generation technology in the 'capacity' dataframe.
        # If that particular ENTSO-E generation technology is not avilable with a given country, then the 'generation_dic' value of that ENTSO-E generation technology is assigned as 0.
        
        capacity_total[abbr] = {}  
        for item in entsoe_index:
            if item not in df.columns.values:
                generation_dic[item] = 0.0
            capacity_total[abbr][item] = capacity.loc[abbr][item]

        # Optimization initialization of the 1st part of the optimization.
        # In this, the total yearly value of each ENTSO-E generation technology of each country will be obtained.

        model = pyo.ConcreteModel()

        model.eurostat_index = pyo.Set(initialize=eurostat_index)
        model.entsoe_index = pyo.Set(initialize=entsoe_index)

        model.alpha = pyo.Var(entsoe_index, bounds=(0.0, None))
        model.beta = pyo.Var(entsoe_index, bounds=(0.0, None))
        model.delta = pyo.Var(entsoe_index, bounds=(0.0, None))

        # Constraint which indicates that the sum of ENTSO-E generation technologies related to a Eurostat generatin technology in 'generation_types', should be equal to the 'eurostat_total' value of the Eurostat generation technology.

        def gen_total(model,eurostat_generation):
            return sum((1+model.alpha[entsoe_generation])*generation_dic[entsoe_generation] + model.beta[entsoe_generation] - model.delta[entsoe_generation] for entsoe_generation in generation_types[eurostat_generation]) == eurostat_total[abbr][eurostat_generation]
        model.gen_total = pyo.Constraint(
            model.eurostat_index, rule=gen_total)

        # Constraint which indicates that the total yearly value of each ENTSO-E generation should not be a negative value.

        def non_zero_rule(model, entsoe_generation):
            return (1+model.alpha[entsoe_generation])*generation_dic[entsoe_generation] + model.beta[entsoe_generation] - model.delta[entsoe_generation] >= 0
        model.non_zero_rule = pyo.Constraint(
            model.entsoe_index, rule=non_zero_rule)

        # Objective function to minimize the squared sum of the decision variabes alpha,beta and delta values.

        def ObjRule(model):
            return sum((1 + model.alpha[entsoe_generation]**2*generation_dic[entsoe_generation]) + (model.beta[entsoe_generation] + model.delta[entsoe_generation])**2 for entsoe_generation in entsoe_index)
        model.obj = pyo.Objective(rule=ObjRule, sense=pyo.minimize)

        opt = SolverFactory("gurobi", solver_io="python")
        opt.solve(model)

        # null values in the decision variable alpha are converted to 0
        # Optimized values of the decision variables are used to build the 'consolidated' dictionary with the optimized yearly sum values of the each ENTSO-E generation technology
        alpha = {}
        for fuel in entsoe_index:
            alpha[fuel] = model.alpha[fuel].value
            if alpha[fuel] is None:
                alpha[fuel] = 0
        consolidated[abbr] = {}
        for fuel in entsoe_index:
            consolidated[abbr][fuel] = (1 + alpha[fuel]) * \
                generation_dic[fuel] + \
                model.beta[fuel].value - model.delta[fuel].value
            if consolidated[abbr][fuel] < 0:
                consolidated[abbr][fuel] = 0.0

        # dictionary 'generation_dic' is filled wih the hourly values of internally consolidated generation data of each country

        generation_dic[abbr] = df
        
        # Assume, an ENTSO-E generation technology produced energy continuosly throughout the year with the generation capacity of 'capacity_total' of that technology.
        # Then, the total energy generated of that generation technology should be greater or equal than the optimized yearly sum value we calculated in the optimization process.
        # But, due to errors in the 'capacity_total' data, above requirement is not satisfied by all the ENTSO-E generation technologies.
        # Such generation technologies which do not satisfy the above rule in each country are added to the list 'violations'.
        # These selected generation technologies are not used for the 2nd step of the optimization.

        violations[abbr] = []
        for column in df.columns.values:
            if 8760*capacity_total[abbr][column] <= consolidated[abbr][column]:
                violations[abbr].append(column)
        for item in violations[abbr]:
            if item in df.columns.values:
                del df[item]

        # In the optimization, we should make sure that during night Solar generation values must not have values higher than 0.
        # In the 'generation_dic' we add all the indexes where the generation values of Solar are less than 0.1 to the variable 'solar_time_index'

        solar_time_index[abbr] = []
        if 'Solar' in df.columns.values:
            for index, values in enumerate(df.values):
                if df.loc[index, 'Solar'] < 0.1:
                    solar_time_index[abbr].append(index)

        # 'solar_index' is built with the generation technology 'Solar'
        # 'entsoe_index' is rebuilt with the ENTSO-E generation technologies applicable to that country

        solar_index = ['Solar']
        entsoe_index = df.columns.values

        # Optimization initialization of the 2nd part of the optimization.
        # In this, the hourly values of each ENTSO-E generation technology of each country will be obtained.

        model = pyo.ConcreteModel()

        model.entsoe_index = pyo.Set(initialize=entsoe_index)
        model.time_index = pyo.Set(initialize=time_index)
        model.solar_index = pyo.Set(initialize=solar_index)

        model.alpha = pyo.Var(model.entsoe_index, bounds=(0.0, None))
        model.beta = pyo.Var(model.entsoe_index,model.time_index, bounds=(0.0, None))
        model.delta = pyo.Var(model.entsoe_index,model.time_index, bounds=(0.0, None))

        # Constraint which indicates that the sum of all the timesteps of the year of each ENTSO-E generation technology is equl to the optimized 'consolidated' value of that technology

        def gen_total_rule(model, entsoe_generation):
            return sum(generation_dic[abbr][entsoe_generation][time] + model.beta[entsoe_generation, time] - model.delta[entsoe_generation, time] for time in time_index) == consolidated[abbr][entsoe_generation]
        model.gen_total_rule = pyo.Constraint(
            model.entsoe_index, rule=gen_total_rule)

        # Constraint which indicates that the hourly value of each ENTSO-E generation technology should not be a negative value.

        def non_zero_rule(model, entsoe_generation, time):
            return generation_dic[abbr][entsoe_generation][time] + model.beta[entsoe_generation, time] - model.delta[entsoe_generation, time] >= 0
        model.non_zero_rule = pyo.Constraint(
            model.entsoe_index, model.time_index, rule=non_zero_rule)

        # Constraint which indicates that the hourly value of each ENTSO-E generation technology should be less than or equal to the 'capacity_total' value of the technology

        def capacity_rule(model, entsoe_generation, time):
            return generation_dic[abbr][entsoe_generation][time] + model.beta[entsoe_generation, time] - model.delta[entsoe_generation, time] <= capacity_total[abbr][entsoe_generation]
        model.capacity_rule = pyo.Constraint(
            model.entsoe_index, model.time_index, rule=capacity_rule)

        # In the following constraint the Netherland has used the 'solar_time_index' of Belgium(closest country to NL) because, in Netherland original ENTSO-E values of Solar generation have been recorded as 0 except in one time step throughout the year.
        # In other countries, solar_rule make sures that in time steps related to the values in 'solar_time_index', optimized value does not become greater than 0.
        # Basically, this rule make sure that optimized value of solar generation does not become a value greater than 0 at night.

        if abbr == 'NL':
            model.solar_time_index = pyo.Set(initialize=solar_time_index['BE'])
        else:
            model.solar_time_index = pyo.Set(initialize=solar_time_index[abbr])
        def solar_rule(model, solar, time):
            return generation_dic[abbr]['Solar'][time] + model.beta[solar, time] - model.delta[solar, time] == 0.0
        model.solar_rule = pyo.Constraint(
            model.solar_index, model.solar_time_index, rule=solar_rule)

        # Objective function to minimize the squared sum of the decision variabes alpha,beta and delta values.

        def ObjRule(model):
            return sum((model.beta[entsoe_generation, time] + model.delta[entsoe_generation, time]) **2 for entsoe_generation in entsoe_index for time in time_index)
        model.obj = pyo.Objective(rule=ObjRule, sense=pyo.minimize)

        opt = SolverFactory("gurobi", solver_io="python")
        opt.solve(model)
        print(abbr, ': OPTIMIZATION COMPLETED')

        # Optimized hourly generation values are saved as csv
        # Also the Optimized hourly generation values are saved in a dataframe called 'consolidated_ext_gen_data'
        rows = []
        for time in time_index:
            row = []
            for entsoe_generation in entsoe_index:
                value = generation_dic[abbr][entsoe_generation][time] + model.beta[entsoe_generation, time].value - model.delta[entsoe_generation, time].value
                if value<0:
                    value = 0
                row.append(value)
            rows.append(row)
        dataframe = pd.DataFrame.from_records(rows)
        dataframe.columns = df.columns
        dataframe.to_csv(f"../Data Sources/output/External/Generation/{abbr}.csv")
        consolidated_ext_gen_data[abbr] = dataframe
    
    return consolidated_ext_gen_data
