In [1]:
from entsoe import EntsoePandasClient
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

client = EntsoePandasClient(api_key="b18dfce9-f1e3-4d07-822f-4abd1438e602")

In [28]:
start = pd.Timestamp('20220901', tz='Europe/Amsterdam')
end = pd.Timestamp('20230228', tz='Europe/Amsterdam')

countries = ['AT', 'BE', 'CZ', 'DE_LU', 'FR', 'HR', 'HU', 'NL', 'PL', 'RO', 'SI', 'SK']

gen_types = [
    'Biomass', 'Fossil Brown coal/Lignite', 'Fossil Coal-derived gas', 'Fossil Gas', 'Fossil Hard coal', 
    'Fossil Oil', 'Fossil Oil shale', 'Fossil Peat', 'Geothermal', 'Hydro Pumped Storage', 
    'Hydro Run-of-river and poundage', 'Hydro Water Reservoir', 'Marine', 'Nuclear', 'Other',
    'Other renewable', 'Solar', 'Waste', 'Wind Offshore', 'Wind Onshore'
]

standardized_gen_types = [
    'biomass', 'brown_coal', 'coal_gas', 'natural_gas', 'hard_coal', 'oil', 'oil_shale', 'peat', 'geothermal', 
    'hydro_pumped_storage', 'hydro_run_of_river', 'hydro_reservoir', 'marine', 'nuclear', 'other', 'other_renewable',
    'solar', 'waste', 'wind_offshore', 'wind_onshore'
]

reduced_gen_types = [
    'biomass', 'brown_coal', 'coal_gas', 'natural_gas', 'hard_coal', 'oil', 'hydro', 
    'nuclear', 'waste', 'other', 'solar', 'wind_onshore', 'wind_offshore', 'other_renewable'
]

In [16]:
df_gen = pd.read_excel('./generation.xlsx', sheet_name='AT', index_col=0)

In [25]:
with pd.ExcelWriter("generation_outages.xlsx") as writer:
    for country_code in countries:
        print(country_code)
        
        try:
            df_out_gen = client.query_unavailability_of_generation_units(country_code, start=start, end=end)
            df_out_gen = df_out_gen.tz_localize(None)
            df_out_gen['end'] = df_out_gen['end'].dt.tz_localize(None)
            df_out_gen['start'] = df_out_gen['start'].dt.tz_localize(None)

            df_outages_zone = pd.DataFrame()

            for gen_date in df_gen.index:
                df_out_gen_test = df_out_gen[(df_out_gen.start <= gen_date) & (df_out_gen.end >= gen_date) & (df_out_gen.docstatus != 'Cancelled')]

                for gen_type in standardized_gen_types:
                        df_outages_zone.loc[gen_date, gen_type] = 0

                for index, row in df_out_gen_test.iterrows():
                    df_outages_zone.loc[gen_date, standardized_gen_types[gen_types.index(row['plant_type'])]] += float(row['nominal_power']) - float(row['avail_qty'])
                    df_outages_zone.loc[gen_date, 'hydro'] = df_outages_zone.loc[gen_date, 'hydro_pumped_storage'] + df_outages_zone.loc[gen_date, 'hydro_run_of_river'] + df_outages_zone.loc[gen_date, 'hydro_reservoir']

            df_outages_zone = df_outages_zone[reduced_gen_types]
            df_outages_zone.to_excel(writer, sheet_name=country_code)
        except:
            print("An exception occurred")
            df_outages_zone = pd.DataFrame()
            for gen_date in df_gen.index:
                for gen_type in reduced_gen_types:
                    df_outages_zone.loc[gen_date, gen_type] = 0
            df_outages_zone.to_excel(writer, sheet_name=country_code)
            

FR


In [26]:
start_autumn_train = np.datetime64("2022-09-01")
end_autumn_train = np.datetime64("2022-10-31")

start_autumn_test = np.datetime64("2022-11-01")
end_autumn_test = np.datetime64("2022-11-30")

start_winter_train = np.datetime64("2022-12-01")
end_winter_train = np.datetime64("2023-01-31")

start_winter_test = np.datetime64("2023-02-01")
end_winter_test = np.datetime64("2023-02-28")

def training_mask(df):
    return ((df.index >= start_autumn_train) & (df.index <= end_autumn_train)) | ((df.index >= start_winter_train) & (df.index <= end_winter_train))

def validation_mask(df):
    return ((df.index >= start_autumn_test) & (df.index <= end_autumn_test)) | ((df.index >= start_winter_test) & (df.index <= end_winter_test))


In [29]:
with pd.ExcelWriter("./optimisation/data/validation/generation_outages.xlsx") as writer:  
    for country_code in countries:
        df_gen = pd.read_excel("./for_io/generation_outages.xlsx", sheet_name=country_code, index_col=0)
        
        print('Training set, ', country_code)
        df_gen_train = df_gen.loc[training_mask(df_gen)]

        df_gen_train.to_excel(writer, sheet_name=country_code) 
        
with pd.ExcelWriter("./optimisation/data/training/generation_outages.xlsx") as writer:  
    for country_code in countries:
        df_gen = pd.read_excel("./for_io/generation_outages.xlsx", sheet_name=country_code, index_col=0)
        
        print('Validation set, ', country_code)
        df_gen_test = df_gen.loc[validation_mask(df_gen)]
        
        df_gen_test.to_excel(writer, sheet_name=country_code) 

Training set,  AT
Training set,  BE
Training set,  CZ
Training set,  DE_LU
Training set,  FR
Training set,  HR
Training set,  HU
Training set,  NL
Training set,  PL
Training set,  RO
Training set,  SI
Training set,  SK
Validation set,  AT
Validation set,  BE
Validation set,  CZ
Validation set,  DE_LU
Validation set,  FR
Validation set,  HR
Validation set,  HU
Validation set,  NL
Validation set,  PL
Validation set,  RO
Validation set,  SI
Validation set,  SK
