In [238]:
import numpy as np
import pandas as pd
import powerplantmatching as pm
import pypsa
import datetime as dt

In [93]:
def generate_outage_matrix(seed=42):
    np.random.seed(seed)

    hours_per_year = 365 * 24
    generators = ["gas", "coal", "wind"]
    outage_rates = {"gas": 100, "coal": 200, "wind": 400}
    outage_durations = {"gas": 2, "coal": 3, "wind": 4}

    matrix = np.zeros((len(generators), hours_per_year), dtype=bool)

    for i, gen in enumerate(generators):
        rate = outage_rates[gen]
        duration = outage_durations[gen]
        
        # Generate inter-arrival times (in hours) using exponential distribution
        mean_interval = hours_per_year / rate
        times = np.cumsum(np.random.exponential(mean_interval, rate * 2))
        
        # Only keep outage start times within the year
        outage_starts = times[times < hours_per_year].astype(int)
        
        for start in outage_starts:
            end = min(start + duration, hours_per_year)
            matrix[i, start:end] = True

    return matrix, generators


## Load forced outage statistics

In [372]:
#file2 = snakemake.params.data_folder + "Common data/Common Data.xlsx"
file = "/trinity/home/fuhrand/ERAA/open_eraa/data/Common data/Common Data.xlsx"

excel_file = pd.ExcelFile(file)

properties_raw_eff = pd.read_excel(excel_file, "Common Data", index_col=[2,3], skiprows=10, header=0).dropna(how="all").iloc[2:, 1:].dropna(how="all", axis=1).iloc[:27, 1:17]
properties_raw_outstats = pd.read_excel(excel_file, "Common Data", index_col=[2,3], skiprows=44, header=[0,3]).iloc[:, 1:].dropna(how="all", axis=1).dropna(how="all").iloc[:27, 1:17]

In [373]:
# Check uniqueness of index
if not properties_raw_eff.index.unique().shape[0] == properties_raw_eff.index.shape[0]: raise Exception( "Index of generator type is not unique" )
if not properties_raw_outstats.index.unique().shape[0] == properties_raw_outstats.index.shape[0]: raise Exception( "Index of generator type is not unique" )

In [None]:
# Remove irrelevant columns, rename columns and flag relevant entries
properties_raw_outstats.columns = properties_raw_outstats.columns.droplevel()
properties_raw_outstats.index = properties_raw_outstats.index.set_names(['Fuel', 'Type'])
properties_raw_outstats.rename(columns={'%': 'rel_occ'}, inplace=True)
properties_raw_eff.rename(columns={'Efficiency range in NCV terms': 'eff'}, inplace=True)
properties_stage_outstats = properties_raw_outstats.loc[:, ['rel_occ','Days']].reset_index().merge(properties_raw_eff[["eff"]], on=['Fuel', 'Type'], how='inner')

properties_stage_outstats['NewType'] = np.where(
     (properties_stage_outstats['Fuel'] == 'Gas') & (properties_stage_outstats['Type'].str.contains('CCGT', regex= True)), 'CCGT', 
        np.where((properties_stage_outstats['Fuel'] == 'Gas') & (~properties_stage_outstats['Type'].str.contains('CCGT', regex= True)), 'OCGT', 
        'rel')
)

In [433]:
properties_stage_outstats["eff_low_raw"] = properties_stage_outstats.eff.fillna(0).astype(str).str[:2].astype(float)/100
properties_stage_outstats["eff_up_raw"] = properties_stage_outstats.eff.fillna('99%').astype(str).str[-3:-1].astype(float)/100
properties_stage_outstats["eff_low_min"] = properties_stage_outstats.groupby("Fuel").eff_low_raw.transform('min')
properties_stage_outstats["eff_up_max"] = properties_stage_outstats.groupby("Fuel").eff_up_raw.transform('max')

In [434]:
#properties_stage_outstats["eff_low"] = np.where(properties_stage_outstats['eff_low_raw'] == properties_stage_outstats['eff_low_min'], 0, properties_stage_outstats['eff_low_raw'])
#properties_stage_outstats["eff_up"] = np.where(properties_stage_outstats['eff_up_raw'] == properties_stage_outstats['eff_up_max'], 1, properties_stage_outstats['eff_up_raw'])

## Load generators

In [539]:
power_plants = pm.powerplants()

In [540]:
#power_plants[["Fueltype", "Technology", "Set"]].drop_duplicates().to_excel("/trinity/home/fuhrand/ERAA/open_eraa/data/temp/powerplants.xlsx", index=False)

In [541]:
power_plants['NewTechnology'] = np.where(
     (power_plants['Fueltype'] == 'Natural Gas') & 
        (power_plants['Technology'].str.contains('CCGT', case=False)), 'CCGT', 
        np.where((power_plants['Fueltype'] == 'Natural Gas') & 
        (~power_plants['Technology'].fillna('rel').str.contains('CCGT', case=False)), 'OCGT', 'rel')
)

## Add outage parameters to generators

### Add default values

In [542]:
power_plants[["unavail_rate_def"]] = properties_stage_outstats.loc[:, ['rel_occ']].mean()
power_plants[["unavail_days_def"]] = properties_stage_outstats.loc[:, ['Days']].mean()

### Add more precise values for fuel and type

In [543]:
dict_fuel = {'Hard coal': 'Hard Coal',
'Hydrogen': 'Hydro',
'Lignite': 'Lignite',
'Gas': 'Natural Gas',
'Nuclear': 'Nuclear',
'Heavy oil': 'Oil',
'Light oil': 'Oil',
'Oil shale': 'Oil'}

In [544]:
properties_stage_outstats["pp_fuel"] = properties_stage_outstats['Fuel'].map(dict_fuel)

In [570]:
properties_stage_outstats.groupby(["pp_fuel", "NewType"]).rel_occ.mean()

pp_fuel      NewType
Hard Coal    rel        0.0875
Hydro        rel        0.0500
Lignite      rel        0.0875
Natural Gas  CCGT       0.0600
             OCGT       0.0725
Nuclear      rel        0.0500
Oil          rel        0.0910
Name: rel_occ, dtype: float64

In [545]:
power_plants["unavail_days_fueltype"] = power_plants.set_index(
    ['Fueltype','NewTechnology']).index.map(
        dict(properties_stage_outstats.groupby(["pp_fuel", "NewType"]).Days.mean()))
power_plants["unavail_rate_fueltype"] = power_plants.set_index(
    ['Fueltype','NewTechnology']).index.map(
        dict(properties_stage_outstats.groupby(["pp_fuel", "NewType"]).rel_occ.mean()))

### Add more precise values based on generator efficiency

In [547]:
s = power_plants.loc[
    power_plants['Fueltype'].isin(['Hard Coal', 'Lignite'])].Efficiency.transform(
        lambda x: pd.cut(x, bins=[0, 0.43999, 1], labels=[0.1, 0.075])).astype(float)
power_plants.loc[s.index, 'unavail_rate_eff'] = s

s = power_plants.loc[
    power_plants['Fueltype'].isin(['Natural Gas']) & power_plants['NewTechnology'].isin(['CCGT'])].Efficiency.transform(
        lambda x: pd.cut(x, bins=[0, 0.52999, 1], labels=[0.05, 0.08])).astype(float)
power_plants.loc[s.index, 'unavail_rate_eff'] = s

s = power_plants.loc[
    power_plants['Fueltype'].isin(['Natural Gas']) & power_plants['NewTechnology'].isin(['OCGT'])].Efficiency.transform(
        lambda x: pd.cut(x, bins=[0, 0.38999, 1], labels=[0.05, 0.08])).astype(float)
power_plants.loc[s.index, 'unavail_rate_eff'] = s

### Consolidate final parameters

In [557]:
power_plants['unavail_rate'] = np.where(~power_plants.unavail_rate_eff.isna(), power_plants.unavail_rate_eff, 
    np.where(~power_plants.unavail_rate_fueltype.isna(), power_plants.unavail_rate_fueltype, power_plants.unavail_rate_def))

power_plants['unavail_days'] = np.where(~power_plants.unavail_days_fueltype.isna(), power_plants.unavail_days_fueltype, power_plants.unavail_days_def)

In [568]:
power_plants.groupby(['Fueltype', 'NewTechnology'])['unavail_rate'].agg(['unique'])

Unnamed: 0_level_0,Unnamed: 1_level_0,unique
Fueltype,NewTechnology,Unnamed: 2_level_1
Bioenergy,rel,[0.0751923076923077]
Geothermal,rel,[0.0751923076923077]
Hard Coal,rel,"[0.0875, 0.075, 0.1]"
Hydro,rel,[0.05]
Lignite,rel,"[0.0875, 0.1]"
Natural Gas,CCGT,"[0.06, 0.05, 0.08]"
Natural Gas,OCGT,"[0.07250000000000001, 0.05, 0.08]"
Nuclear,rel,[0.05]
Oil,rel,[0.091]
Other,rel,[0.0751923076923077]


In [569]:
power_plants.groupby(['Fueltype', 'NewTechnology'])['unavail_days'].agg(['unique'])

Unnamed: 0_level_0,Unnamed: 1_level_0,unique
Fueltype,NewTechnology,Unnamed: 2_level_1
Bioenergy,rel,[1.2307692307692308]
Geothermal,rel,[1.2307692307692308]
Hard Coal,rel,[1.0]
Hydro,rel,[1.0]
Lignite,rel,[1.0]
Natural Gas,CCGT,[1.0]
Natural Gas,OCGT,[1.0]
Nuclear,rel,[7.0]
Oil,rel,[1.0]
Other,rel,[1.2307692307692308]


## Simulate forced outages

In [None]:
df_merged = pd.merge(
    df_pm_renamed,
    df_entsoe,
    left_on=['Name', 'Country', 'Fuel', 'Type'],
    right_on=['Name', 'Country', 'Fuel', 'Type'],
    how='outer',  # or 'inner' for strict matches
    suffixes=('_pm', '_entsoe')
)

In [None]:
year = 2030
hours_per_year = int((datetime.datetime(year, 12, 31) - datetime.datetime(year, 1, 1)).total_seconds()/3600)

np_forcedoutages = np.zeros((len(power_plants), hours_per_year), dtype=bool)

In [None]:
np.random.seed(42)

for i, gen in enumerate(generators):
    rate = outage_rates[gen]
    duration = outage_durations[gen]
    
    # Generate inter-arrival times (in hours) using exponential distribution
    mean_interval = hours_per_year / rate
    times = np.cumsum(np.random.exponential(mean_interval, rate * 2))
    
    # Only keep outage start times within the year
    outage_starts = times[times < hours_per_year].astype(int)
    
    for start in outage_starts:
        end = min(start + duration, hours_per_year)
        matrix[i, start:end] = True


In [17]:
mean_interval

21.9

In [30]:
power_plants

Matched Data,Name,Fueltype,Technology,Set,Country,Capacity,Efficiency,DateIn,DateRetrofit,DateOut,lat,lon,Duration,Volume_Mm3,DamHeight_m,StorageCapacity_MWh,EIC,projectID
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,Borssele,Hard Coal,Steam Turbine,PP,Netherlands,485.0,,1973.0,,2034.0,51.4332,3.7160,,0.0,0.0,0.0,{'49W000000000054X'},"{'BEYONDCOAL': {'BEYOND-NL-2'}, 'ENTSOE': {'49..."
1,Oraison,Hydro,Reservoir,Store,France,187.0,,1962.0,,,43.9212,5.9253,,0.0,0.0,0.0,{'17W100P100P0288Y'},"{'ENTSOE': {'17W100P100P0288Y'}, 'GEM': {'G601..."
2,Ottmarsheim,Hydro,Run-Of-River,PP,France,156.0,,1952.0,,,47.7741,7.5225,,0.0,0.0,0.0,{'17W100P100P0306L'},"{'ENTSOE': {'17W100P100P0306L'}, 'GEM': {'G601..."
3,Pied De Borne,Hydro,Reservoir,Store,France,109.4,,1965.0,,,44.4788,3.9858,,0.0,0.0,0.0,{'17W100P100P0289W'},"{'ENTSOE': {'17W100P100P0289W'}, 'GEM': {'G601..."
4,Pouget,Hydro,Reservoir,Store,France,446.9,,1951.0,,,44.0597,2.7701,1.573034,0.0,0.0,0.0,{'17W100P100P0290A'},"{'ENTSOE': {'17W100P100P0290A'}, 'GEM': {'G601..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22684,Zwolle Solar,Solar,PV,PP,Netherlands,5.2,,2019.0,,,52.5066,6.1377,,0.0,0.0,0.0,{},{'GEM': {'G100001008668'}}
22685,Zwolle Solar,Solar,PV,PP,Netherlands,16.3,,2021.0,,,52.4610,6.1230,,0.0,0.0,0.0,{},{'GEM': {'G100001008669'}}
22686,Zychlin Kutnowski Solar,Solar,PV,PP,Poland,2.3,,,,,52.2440,19.6261,,0.0,0.0,0.0,{},{'GEM': {'G100001007329'}}
22687,Zydowo Koszalinski Solar,Solar,PV,PP,Poland,1.0,,,,,54.0427,16.7180,,0.0,0.0,0.0,{},{'GEM': {'G100001007483'}}


In [8]:
n = pypsa.Network()

In [11]:

    outage_matrix, gen_labels = generate_outage_matrix(seed=123)
    print("Generator order:", gen_labels)
    print("Matrix shape:", outage_matrix.shape)
    print("Sample of first 100 hours:\n", outage_matrix[:, :100])


Generator order: ['gas', 'coal', 'wind']
Matrix shape: (3, 8760)
Sample of first 100 hours:
 [[False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False]
 [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False  True  True
   True  True  True  True False False False False False False Fals