In [94]:
import numpy as np
import pandas as pd
import powerplantmatching as pm
import pypsa
import datetime as dt

In [93]:
def generate_outage_matrix(seed=42):
    np.random.seed(seed)

    hours_per_year = 365 * 24
    generators = ["gas", "coal", "wind"]
    outage_rates = {"gas": 100, "coal": 200, "wind": 400}
    outage_durations = {"gas": 2, "coal": 3, "wind": 4}

    matrix = np.zeros((len(generators), hours_per_year), dtype=bool)

    for i, gen in enumerate(generators):
        rate = outage_rates[gen]
        duration = outage_durations[gen]
        
        # Generate inter-arrival times (in hours) using exponential distribution
        mean_interval = hours_per_year / rate
        times = np.cumsum(np.random.exponential(mean_interval, rate * 2))
        
        # Only keep outage start times within the year
        outage_starts = times[times < hours_per_year].astype(int)
        
        for start in outage_starts:
            end = min(start + duration, hours_per_year)
            matrix[i, start:end] = True

    return matrix, generators


## Load forced outage statistics

In [None]:
#file2 = snakemake.params.data_folder + "Common data/Common Data.xlsx"
file = "/trinity/home/fuhrand/ERAA/open_eraa/data/Common data/Common Data.xlsx"

excel_file = pd.ExcelFile(file)

properties_raw = pd.read_excel(excel_file2, "Common Data", index_col=[2,3], skiprows=44, header=[0,3]).iloc[:, 1:].dropna(how="all", axis=1).dropna(how="all").iloc[:27, 1:17]

In [61]:
# Check uniqueness of index
if not properties_raw.index.unique().shape[0] == properties_raw.index.shape[0]: raise Exception( "Index of generator type is not unique" )

In [None]:
# Remove irrelevant columns and rename columns
properties_raw.columns = properties_raw.columns.droplevel()

In [189]:
properties_raw.index = properties_raw.index.set_names(['Fuel', 'Type'])
properties_raw.rename(columns={'%': 'rel_occ'}, inplace=True)

In [190]:
properties_stage = properties_raw.loc[:, ['rel_occ','Days']].reset_index()

## Load generators

In [58]:
power_plants = pm.powerplants()

In [73]:
power_plants.head(5)

Matched Data,Name,Fueltype,Technology,Set,Country,Capacity,Efficiency,DateIn,DateRetrofit,DateOut,lat,lon,Duration,Volume_Mm3,DamHeight_m,StorageCapacity_MWh,EIC,projectID
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,Borssele,Hard Coal,Steam Turbine,PP,Netherlands,485.0,,1973.0,,2034.0,51.4332,3.716,,0.0,0.0,0.0,{'49W000000000054X'},"{'BEYONDCOAL': {'BEYOND-NL-2'}, 'ENTSOE': {'49..."
1,Oraison,Hydro,Reservoir,Store,France,187.0,,1962.0,,,43.9212,5.9253,,0.0,0.0,0.0,{'17W100P100P0288Y'},"{'ENTSOE': {'17W100P100P0288Y'}, 'GEM': {'G601..."
2,Ottmarsheim,Hydro,Run-Of-River,PP,France,156.0,,1952.0,,,47.7741,7.5225,,0.0,0.0,0.0,{'17W100P100P0306L'},"{'ENTSOE': {'17W100P100P0306L'}, 'GEM': {'G601..."
3,Pied De Borne,Hydro,Reservoir,Store,France,109.4,,1965.0,,,44.4788,3.9858,,0.0,0.0,0.0,{'17W100P100P0289W'},"{'ENTSOE': {'17W100P100P0289W'}, 'GEM': {'G601..."
4,Pouget,Hydro,Reservoir,Store,France,446.9,,1951.0,,,44.0597,2.7701,1.573034,0.0,0.0,0.0,{'17W100P100P0290A'},"{'ENTSOE': {'17W100P100P0290A'}, 'GEM': {'G601..."


In [81]:
power_plants[["Fueltype", "Technology", "Set"]].drop_duplicates().to_excel("/trinity/home/fuhrand/ERAA/open_eraa/data/temp/powerplants.xlsx", index=False)

In [91]:
power_plants.loc[~power_plants.Efficiency.isna()][["Fueltype", "Technology", "Set", "Efficiency"]].drop_duplicates() #Fueltype.unique()

Matched Data,Fueltype,Technology,Set,Efficiency
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
101,Nuclear,Steam Turbine,PP,0.330000
109,Hard Coal,CCGT,PP,0.550000
111,Hard Coal,CCGT,PP,0.187765
126,Hard Coal,CCGT,CHP,0.590000
196,Lignite,CCGT,CHP,0.403700
...,...,...,...,...
6740,Natural Gas,Steam Turbine,CHP,0.342100
6742,Natural Gas,Steam Turbine,PP,0.326700
6748,Natural Gas,Steam Turbine,CHP,0.386100
6751,Oil,Steam Turbine,PP,0.309800


In [72]:
power_plants.to_excel("/trinity/home/fuhrand/ERAA/open_eraa/data/temp/powerplants.xlsx", index=False)

## Add outage parameters to generators

### Add default values

In [None]:
power_plants[["unavail_rate_def"]] = properties_stage.loc[:, ['rel_occ']].mean()
power_plants[["unavail_days_def"]] = properties_stage.loc[:, ['Days']].mean()

### Update default values if more precise values are present on fuel type level

In [192]:
dict_fuel = {'Hard coal': 'Hard Coal',
'Hydrogen': 'Hydro',
'Lignite': 'Lignite',
'Gas': 'Natural Gas',
'Nuclear': 'Nuclear',
'Heavy oil': 'Oil',
'Light oil': 'Oil',
'Oil shale': 'Oil'}

In [193]:
properties_stage["pp_fuel"] = properties_stage['Fuel'].map(dict_fuel)

In [None]:
power_plants["unavail_days_gen"] = power_plants['Fueltype'].map(dict(properties_stage.groupby(["pp_fuel"]).Days.mean()))
power_plants["unavail_rate_gen"] = power_plants['Fueltype'].map(dict(properties_stage.groupby(["pp_fuel"]).rel_occ.mean()))

### Update values where more precise values are present for fuel and type

In [211]:
power_plants

Matched Data,Name,Fueltype,Technology,Set,Country,Capacity,Efficiency,DateIn,DateRetrofit,DateOut,...,Duration,Volume_Mm3,DamHeight_m,StorageCapacity_MWh,EIC,projectID,unavailability_rate,unavailability_days,unavail_days_gen,unavail_rate_gen
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,Borssele,Hard Coal,Steam Turbine,PP,Netherlands,485.0,,1973.0,,2034.0,...,,0.0,0.0,0.0,{'49W000000000054X'},"{'BEYONDCOAL': {'BEYOND-NL-2'}, 'ENTSOE': {'49...",0.075192,1.230769,1.0,0.0875
1,Oraison,Hydro,Reservoir,Store,France,187.0,,1962.0,,,...,,0.0,0.0,0.0,{'17W100P100P0288Y'},"{'ENTSOE': {'17W100P100P0288Y'}, 'GEM': {'G601...",0.075192,1.230769,1.0,0.0500
2,Ottmarsheim,Hydro,Run-Of-River,PP,France,156.0,,1952.0,,,...,,0.0,0.0,0.0,{'17W100P100P0306L'},"{'ENTSOE': {'17W100P100P0306L'}, 'GEM': {'G601...",0.075192,1.230769,1.0,0.0500
3,Pied De Borne,Hydro,Reservoir,Store,France,109.4,,1965.0,,,...,,0.0,0.0,0.0,{'17W100P100P0289W'},"{'ENTSOE': {'17W100P100P0289W'}, 'GEM': {'G601...",0.075192,1.230769,1.0,0.0500
4,Pouget,Hydro,Reservoir,Store,France,446.9,,1951.0,,,...,1.573034,0.0,0.0,0.0,{'17W100P100P0290A'},"{'ENTSOE': {'17W100P100P0290A'}, 'GEM': {'G601...",0.075192,1.230769,1.0,0.0500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22684,Zwolle Solar,Solar,PV,PP,Netherlands,5.2,,2019.0,,,...,,0.0,0.0,0.0,{},{'GEM': {'G100001008668'}},0.075192,1.230769,,
22685,Zwolle Solar,Solar,PV,PP,Netherlands,16.3,,2021.0,,,...,,0.0,0.0,0.0,{},{'GEM': {'G100001008669'}},0.075192,1.230769,,
22686,Zychlin Kutnowski Solar,Solar,PV,PP,Poland,2.3,,,,,...,,0.0,0.0,0.0,{},{'GEM': {'G100001007329'}},0.075192,1.230769,,
22687,Zydowo Koszalinski Solar,Solar,PV,PP,Poland,1.0,,,,,...,,0.0,0.0,0.0,{},{'GEM': {'G100001007483'}},0.075192,1.230769,,


## Simulate forced outages

In [None]:
mapping = {
    'Fueltype': 'Fuel',
    'Technology': 'Type',
    'Capacity': 'Installed_Capacity',  # Example
    # Add more as needed
}
df_pm_renamed = df_pm.rename(columns=mapping)

In [None]:
df_merged = pd.merge(
    df_pm_renamed,
    df_entsoe,
    left_on=['Name', 'Country', 'Fuel', 'Type'],
    right_on=['Name', 'Country', 'Fuel', 'Type'],
    how='outer',  # or 'inner' for strict matches
    suffixes=('_pm', '_entsoe')
)

In [None]:
year = 2030
hours_per_year = int((datetime.datetime(year, 12, 31) - datetime.datetime(year, 1, 1)).total_seconds()/3600)

np_forcedoutages = np.zeros((len(power_plants), hours_per_year), dtype=bool)

In [None]:
np.random.seed(42)

for i, gen in enumerate(generators):
    rate = outage_rates[gen]
    duration = outage_durations[gen]
    
    # Generate inter-arrival times (in hours) using exponential distribution
    mean_interval = hours_per_year / rate
    times = np.cumsum(np.random.exponential(mean_interval, rate * 2))
    
    # Only keep outage start times within the year
    outage_starts = times[times < hours_per_year].astype(int)
    
    for start in outage_starts:
        end = min(start + duration, hours_per_year)
        matrix[i, start:end] = True


In [17]:
mean_interval

21.9

In [30]:
power_plants

Matched Data,Name,Fueltype,Technology,Set,Country,Capacity,Efficiency,DateIn,DateRetrofit,DateOut,lat,lon,Duration,Volume_Mm3,DamHeight_m,StorageCapacity_MWh,EIC,projectID
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,Borssele,Hard Coal,Steam Turbine,PP,Netherlands,485.0,,1973.0,,2034.0,51.4332,3.7160,,0.0,0.0,0.0,{'49W000000000054X'},"{'BEYONDCOAL': {'BEYOND-NL-2'}, 'ENTSOE': {'49..."
1,Oraison,Hydro,Reservoir,Store,France,187.0,,1962.0,,,43.9212,5.9253,,0.0,0.0,0.0,{'17W100P100P0288Y'},"{'ENTSOE': {'17W100P100P0288Y'}, 'GEM': {'G601..."
2,Ottmarsheim,Hydro,Run-Of-River,PP,France,156.0,,1952.0,,,47.7741,7.5225,,0.0,0.0,0.0,{'17W100P100P0306L'},"{'ENTSOE': {'17W100P100P0306L'}, 'GEM': {'G601..."
3,Pied De Borne,Hydro,Reservoir,Store,France,109.4,,1965.0,,,44.4788,3.9858,,0.0,0.0,0.0,{'17W100P100P0289W'},"{'ENTSOE': {'17W100P100P0289W'}, 'GEM': {'G601..."
4,Pouget,Hydro,Reservoir,Store,France,446.9,,1951.0,,,44.0597,2.7701,1.573034,0.0,0.0,0.0,{'17W100P100P0290A'},"{'ENTSOE': {'17W100P100P0290A'}, 'GEM': {'G601..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22684,Zwolle Solar,Solar,PV,PP,Netherlands,5.2,,2019.0,,,52.5066,6.1377,,0.0,0.0,0.0,{},{'GEM': {'G100001008668'}}
22685,Zwolle Solar,Solar,PV,PP,Netherlands,16.3,,2021.0,,,52.4610,6.1230,,0.0,0.0,0.0,{},{'GEM': {'G100001008669'}}
22686,Zychlin Kutnowski Solar,Solar,PV,PP,Poland,2.3,,,,,52.2440,19.6261,,0.0,0.0,0.0,{},{'GEM': {'G100001007329'}}
22687,Zydowo Koszalinski Solar,Solar,PV,PP,Poland,1.0,,,,,54.0427,16.7180,,0.0,0.0,0.0,{},{'GEM': {'G100001007483'}}


In [8]:
n = pypsa.Network()

In [11]:

    outage_matrix, gen_labels = generate_outage_matrix(seed=123)
    print("Generator order:", gen_labels)
    print("Matrix shape:", outage_matrix.shape)
    print("Sample of first 100 hours:\n", outage_matrix[:, :100])


Generator order: ['gas', 'coal', 'wind']
Matrix shape: (3, 8760)
Sample of first 100 hours:
 [[False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False]
 [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False  True  True
   True  True  True  True False False False False False False Fals