In [590]:
import numpy as np
import pandas as pd
import powerplantmatching as pm
import datetime as dt

In [93]:
# def generate_outage_matrix(seed=42):
#     np.random.seed(seed)

#     hours_per_year = 365 * 24
#     generators = ["gas", "coal", "wind"]
#     outage_rates = {"gas": 100, "coal": 200, "wind": 400}
#     outage_durations = {"gas": 2, "coal": 3, "wind": 4}

#     matrix = np.zeros((len(generators), hours_per_year), dtype=bool)

#     for i, gen in enumerate(generators):
#         rate = outage_rates[gen]
#         duration = outage_durations[gen]
        
#         # Generate inter-arrival times (in hours) using exponential distribution
#         mean_interval = hours_per_year / rate
#         times = np.cumsum(np.random.exponential(mean_interval, rate * 2))
        
#         # Only keep outage start times within the year
#         outage_starts = times[times < hours_per_year].astype(int)
        
#         for start in outage_starts:
#             end = min(start + duration, hours_per_year)
#             matrix[i, start:end] = True

#     return matrix, generators


## Load forced outage statistics

In [654]:
#file2 = snakemake.params.data_folder + "Common data/Common Data.xlsx"
file = "/trinity/home/fuhrand/ERAA/open_eraa/data/Common data/Common Data.xlsx"

excel_file = pd.ExcelFile(file)

properties_raw_eff = pd.read_excel(excel_file, "Common Data", index_col=[2,3], skiprows=10, header=0).dropna(how="all").iloc[2:, 1:].dropna(how="all", axis=1).iloc[:27, 1:17]
properties_raw_outstats = pd.read_excel(excel_file, "Common Data", index_col=[2,3], skiprows=44, header=[0,3]).iloc[:, 1:].dropna(how="all", axis=1).dropna(how="all").iloc[:27, 1:17]

In [655]:
# Check uniqueness of index
if not properties_raw_eff.index.unique().shape[0] == properties_raw_eff.index.shape[0]: raise Exception( "Index of generator type is not unique" )
if not properties_raw_outstats.index.unique().shape[0] == properties_raw_outstats.index.shape[0]: raise Exception( "Index of generator type is not unique" )

In [656]:
# Remove irrelevant columns, rename columns and flag relevant entries
properties_raw_outstats.columns = properties_raw_outstats.columns.droplevel()
properties_raw_outstats.index = properties_raw_outstats.index.set_names(['Fuel', 'Type'])
properties_raw_outstats.rename(columns={'%': 'rel_occ'}, inplace=True)
properties_raw_eff.rename(columns={'Efficiency range in NCV terms': 'eff'}, inplace=True)
properties_stage_outstats = properties_raw_outstats.loc[:, ['rel_occ','Days']].reset_index().merge(properties_raw_eff[["eff"]], on=['Fuel', 'Type'], how='inner')

properties_stage_outstats['NewType'] = np.where(
     (properties_stage_outstats['Fuel'] == 'Gas') & (properties_stage_outstats['Type'].str.contains('CCGT', regex= True)), 'CCGT', 
        np.where((properties_stage_outstats['Fuel'] == 'Gas') & (~properties_stage_outstats['Type'].str.contains('CCGT', regex= True)), 'OCGT', 
        'rel')
)

## Load generators

In [657]:
power_plants = pm.powerplants()

In [658]:
power_plants['NewTechnology'] = np.where(
     (power_plants['Fueltype'] == 'Natural Gas') & 
        (power_plants['Technology'].str.contains('CCGT', case=False)), 'CCGT', 
        np.where((power_plants['Fueltype'] == 'Natural Gas') & 
        (~power_plants['Technology'].fillna('rel').str.contains('CCGT', case=False)), 'OCGT', 'rel')
)

## Add outage parameters to generators

### Add default values

In [659]:
power_plants[["unavail_rate_def"]] = properties_stage_outstats.loc[:, ['rel_occ']].mean()
power_plants[["unavail_days_def"]] = properties_stage_outstats.loc[:, ['Days']].mean()

### Add more precise values for fuel and type

In [660]:
dict_fuel = {'Hard coal': 'Hard Coal',
'Hydrogen': 'Hydro',
'Lignite': 'Lignite',
'Gas': 'Natural Gas',
'Nuclear': 'Nuclear',
'Heavy oil': 'Oil',
'Light oil': 'Oil',
'Oil shale': 'Oil'}

In [661]:
properties_stage_outstats["pp_fuel"] = properties_stage_outstats['Fuel'].map(dict_fuel)

In [662]:
power_plants["unavail_days_fueltype"] = power_plants.set_index(
    ['Fueltype','NewTechnology']).index.map(
        dict(properties_stage_outstats.groupby(["pp_fuel", "NewType"]).Days.mean()))
power_plants["unavail_rate_fueltype"] = power_plants.set_index(
    ['Fueltype','NewTechnology']).index.map(
        dict(properties_stage_outstats.groupby(["pp_fuel", "NewType"]).rel_occ.mean()))

### Add more precise values based on generator efficiency

In [663]:
# For selected fuels only the rate varies. The unavailable number of days is 1 for all.
s = power_plants.loc[
    power_plants['Fueltype'].isin(['Hard Coal', 'Lignite'])].Efficiency.transform(
        lambda x: pd.cut(x, bins=[0, 0.43999, 1], labels=[0.1, 0.075])).astype(float)
power_plants.loc[s.index, 'unavail_rate_eff'] = s

s = power_plants.loc[
    power_plants['Fueltype'].isin(['Natural Gas']) & power_plants['NewTechnology'].isin(['CCGT'])].Efficiency.transform(
        lambda x: pd.cut(x, bins=[0, 0.52999, 1], labels=[0.05, 0.08])).astype(float)
power_plants.loc[s.index, 'unavail_rate_eff'] = s

s = power_plants.loc[
    power_plants['Fueltype'].isin(['Natural Gas']) & power_plants['NewTechnology'].isin(['OCGT'])].Efficiency.transform(
        lambda x: pd.cut(x, bins=[0, 0.38999, 1], labels=[0.05, 0.08])).astype(float)
power_plants.loc[s.index, 'unavail_rate_eff'] = s

### Consolidate final parameters

In [664]:
power_plants['unavail_rate'] = np.where(~power_plants.unavail_rate_eff.isna(), power_plants.unavail_rate_eff, 
    np.where(~power_plants.unavail_rate_fueltype.isna(), power_plants.unavail_rate_fueltype, power_plants.unavail_rate_def))

power_plants['unavail_days'] = np.where(~power_plants.unavail_days_fueltype.isna(), power_plants.unavail_days_fueltype, power_plants.unavail_days_def)

In [665]:
power_plants.groupby(['Fueltype', 'NewTechnology'])['unavail_rate'].agg(['unique'])

Unnamed: 0_level_0,Unnamed: 1_level_0,unique
Fueltype,NewTechnology,Unnamed: 2_level_1
Bioenergy,rel,[0.0751923076923077]
Geothermal,rel,[0.0751923076923077]
Hard Coal,rel,"[0.0875, 0.075, 0.1]"
Hydro,rel,[0.05]
Lignite,rel,"[0.0875, 0.1]"
Natural Gas,CCGT,"[0.06, 0.05, 0.08]"
Natural Gas,OCGT,"[0.07250000000000001, 0.05, 0.08]"
Nuclear,rel,[0.05]
Oil,rel,[0.091]
Other,rel,[0.0751923076923077]


In [666]:
power_plants.groupby(['Fueltype', 'NewTechnology'])['unavail_days'].agg(['unique'])

Unnamed: 0_level_0,Unnamed: 1_level_0,unique
Fueltype,NewTechnology,Unnamed: 2_level_1
Bioenergy,rel,[1.2307692307692308]
Geothermal,rel,[1.2307692307692308]
Hard Coal,rel,[1.0]
Hydro,rel,[1.0]
Lignite,rel,[1.0]
Natural Gas,CCGT,[1.0]
Natural Gas,OCGT,[1.0]
Nuclear,rel,[7.0]
Oil,rel,[1.0]
Other,rel,[1.2307692307692308]


## Simulate forced outages

In [691]:
year = 2030
hours_per_year = int((datetime.datetime(year, 12, 31) - datetime.datetime(year-1, 12, 31)).total_seconds()/3600)
hours_per_year = 365*24

In [696]:
# Inputs: hours_per_year, generator dataframe

generators = power_plants.index
pp = power_plants

np_forcedoutages = np.ones((len(power_plants), hours_per_year), dtype=int)

for i, gen in enumerate(generators):
    duration = pp.unavail_days[gen] * 24 # convert to hours
    num_outages_year = pp.unavail_rate[gen] * hours_per_year / duration
    num_drawn_outages = int(num_outages_year * 2)
    mean_interval = hours_per_year/num_outages_year
    
    # Generate inter-arrival times (in hours) using exponential distribution
    times = np.cumsum(np.random.exponential(mean_interval, num_drawn_outages))

    # Only keep outage start times within the year
    outage_starts = times[times < hours_per_year].astype(int)

    for start in outage_starts:
        end = int(min(start + duration, hours_per_year))
        np_forcedoutages[i, start:end] = 0

In [697]:
pdf_forcedoutages = pd.DataFrame(np_forcedoutages, index=pp.index)

In [737]:
(pdf_forcedoutages/hours_per_year).groupby(power_plants.Fueltype).mean().T.sum()

Fueltype
Bioenergy      0.930221
Geothermal     0.926379
Hard Coal      0.915453
Hydro          0.951205
Lignite        0.912915
Natural Gas    0.939526
Nuclear        0.952854
Oil            0.913969
Other          0.927374
Solar          0.929038
Waste          0.930266
Wind           0.928774
dtype: float64

In [None]:
pdf_forcedoutages.T.multiply(power_plants.Capacity) #.groupby(power_plants.Fueltype).mean().T.plot()

id,0,1,2,3,4,5,6,7,8,9,...,22679,22680,22681,22682,22683,22684,22685,22686,22687,22688
0,485.0,187.0,156.0,109.4,446.9,189.2,808.0,120.0,592.0,192.0,...,6.3,5.1,29.6,5.6,5.4,5.2,16.3,2.3,1.0,43.2
1,485.0,187.0,156.0,109.4,446.9,189.2,808.0,120.0,592.0,192.0,...,6.3,5.1,29.6,5.6,5.4,5.2,16.3,2.3,1.0,43.2
2,485.0,187.0,156.0,109.4,446.9,189.2,808.0,120.0,592.0,192.0,...,6.3,5.1,29.6,5.6,5.4,5.2,16.3,2.3,1.0,43.2
3,485.0,187.0,156.0,109.4,446.9,189.2,808.0,120.0,592.0,192.0,...,6.3,5.1,29.6,5.6,5.4,5.2,16.3,2.3,1.0,43.2
4,485.0,187.0,156.0,109.4,446.9,189.2,808.0,120.0,592.0,192.0,...,6.3,5.1,29.6,5.6,5.4,5.2,16.3,2.3,1.0,43.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,485.0,187.0,156.0,109.4,446.9,189.2,808.0,120.0,592.0,192.0,...,6.3,5.1,29.6,5.6,5.4,5.2,16.3,2.3,1.0,0.0
8756,485.0,187.0,156.0,109.4,446.9,189.2,808.0,120.0,592.0,192.0,...,6.3,5.1,29.6,5.6,5.4,5.2,16.3,2.3,1.0,0.0
8757,485.0,187.0,156.0,109.4,446.9,189.2,808.0,120.0,592.0,192.0,...,6.3,5.1,29.6,5.6,5.4,5.2,16.3,2.3,1.0,0.0
8758,485.0,187.0,156.0,109.4,446.9,189.2,808.0,120.0,592.0,192.0,...,6.3,5.1,29.6,5.6,5.4,5.2,16.3,2.3,1.0,0.0


In [652]:
pdf_forcedoutages.to_hdf("/trinity/home/fuhrand/ERAA/open_eraa/data/temp/forced_outages.hdf", "forced_outages")