In [1]:
import pandas as pd, numpy as np, os, sqlite3 as db3
from matplotlib import pyplot as plt
from py.tobit import *

Let's specify an output folder:

In [2]:
direc = os.getcwd()
data_dir = os.path.join(direc,'CleanedData')

## Electricity Demand

Get demand:

In [3]:
df_demand = pd.read_pickle(os.path.join(data_dir,'Load_DK1DK2_2019'))

`Inspect df_demand`

In [4]:
df_demand.head()

Unnamed: 0,g_E,HourUTC,HourCET/CEST,Month,Week,Weekday,HourOfTheDay,h,ForecastedLoad_MWh,ActualLoad_MWh
0,DK1,2018-12-31 23:00:00+00:00,2019-01-01 00:00:00+01:00,1,1,1,0,1,1857.0,1898.0
1,DK1,2019-01-01 00:00:00+00:00,2019-01-01 01:00:00+01:00,1,1,1,1,2,1819.0,1838.0
2,DK1,2019-01-01 01:00:00+00:00,2019-01-01 02:00:00+01:00,1,1,1,2,3,1755.0,1779.0
3,DK1,2019-01-01 02:00:00+00:00,2019-01-01 03:00:00+01:00,1,1,1,3,4,1709.0,1726.0
4,DK1,2019-01-01 03:00:00+00:00,2019-01-01 04:00:00+01:00,1,1,1,4,5,1685.0,1710.0


Get areas:

In [5]:
g_area = df_demand['g_E'].unique().tolist()
print(g_area) # to check that our df_demand was built correctly

['DK1', 'DK2']


Estimate hourly variation in demand:

In [6]:
# Make X and y variables:
xvar, yvar = ['intercept','ForecastedLoad_MWh'], 'ActualLoad_MWh'
d_yhat = {}
for g in g_area:
    idx_g = df_demand['g_E']==g
    df_tmp = df_demand[idx_g].copy()
    idx = df_tmp[xvar[1:]+[yvar]].isna().any(axis=1)
    X, y = df_tmp.assign(intercept=1.0).loc[~idx,xvar].values,df_tmp.loc[~idx,[yvar]].values
    # Make OLS estimator:
    β_hat = np.matmul(np.linalg.inv(np.matmul(np.transpose(X),X)),np.matmul(np.transpose(X),y)) # formula OLS estimator: (X'X)^-1 X'y
    # Make predicted load given β_hat
    d_yhat[g] = pd.Series(np.matmul(df_tmp.assign(intercept=1.0)[xvar].values,β_hat).flatten(),index=pd.MultiIndex.from_product([['_'.join(['c',g])],df_tmp['h'].tolist()],names=['c_E','h']),name='LoadVariation_E')

y_hat = pd.concat([d_yhat[g] for g in d_yhat.keys()],axis=0)

Now calculate total yearly demand and share of hourly demand:

In [7]:
Load_E = y_hat.groupby('c_E').sum().rename('Load_E')
LoadVariation_E = y_hat.div(Load_E).rename('LoadVariation_E')

Download marginal prices:

In [8]:
df_prices = pd.read_pickle(os.path.join(data_dir,'SpotPrices_DELUDK1DK2NLNO2SE3SE4_2019'))

Drop DK prices:

In [9]:
df_prices = df_prices[df_prices['g_E'].str.find('DK')==-1]

We need to map the countries to the correct areas; we therefore download mappings of interconnectors:

In [10]:
df_ic = pd.read_pickle(os.path.join(data_dir,'TransmissionCapacities_DK_2019'))[['g_E','g_EE']].drop_duplicates()

`Inspect df_ic`

In [11]:
df_ic.head()

Unnamed: 0,g_E,g_EE
0,DK1,DELU
8759,DK1,DK2
17518,DK1,NL
20134,DK1,NO2
28893,DK1,SE3


Drop DK areas:

In [12]:
idx_DK = (df_ic['g_E'].str.find('DK')!=-1) & (df_ic['g_EE'].str.find('DK')!=-1)
df_ic = df_ic[~idx_DK].rename(columns={'g_E':'g_EE','g_EE':'g_E'})

Add the interconnectors to the prices:

In [13]:
df_prices = df_prices.merge(df_ic,how='outer')

Unstack:

In [14]:
# df_prices = df_prices.set_index(['h','g_E'])['SpotPrice_€/MWh'].unstack('g_E').rename_axis(None,axis=1)

Make marginal willing to pay:

In [15]:
# Remove DK to DK connections:
idx = (df_prices['g_E'].str.find('DK')!=-1) & (df_prices['g_EE'].str.find('DK')!=-1)
MWP_E = df_prices[~idx].replace({'g_E':{x:'c_'+x for x in df_prices['g_E']}})

# Make labels:
MWP_E['c_E'] = [g_EE+'_ExportTo_'+g_E for g_EE,g_E in zip(MWP_E['g_EE'],MWP_E['g_E'].str.replace('c_',''))]
MWP_E = MWP_E.set_index(['c_E','h'])['SpotPrice_€/MWh'].rename('MWP_E').unstack('c_E').rename_axis(None,axis=1)

Add for domestic consumer:

In [16]:
c_E_doms = [''.join(['c_',g_EE]) for g_EE in df_prices['g_EE'].unique()]
MWP_E[c_E_doms] = 1000

`Inspect`

In [17]:
MWP_E.head()

Unnamed: 0_level_0,DK1_ExportTo_DELU,DK1_ExportTo_NL,DK1_ExportTo_NO2,DK1_ExportTo_SE3,DK2_ExportTo_DELU,DK2_ExportTo_SE4,c_DK1,c_DK2
h,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,28.32,68.92,48.77,28.32,28.32,28.32,1000,1000
2,10.07,64.98,49.25,10.07,10.07,10.07,1000,1000
3,-4.08,60.27,49.17,10.03,-4.08,10.03,1000,1000
4,-9.91,49.97,48.37,4.56,-9.91,4.56,1000,1000
5,-7.41,47.66,47.19,4.83,-7.41,4.83,1000,1000


`Note that domestic consumers are assumed to have a MWP_E of 1000 in both bidding areas.`

## Supply variation

Get supply:

In [18]:
df_supply = pd.read_pickle(os.path.join(data_dir,'IntermittentSupply_DK1DK2_2019')).replace({'hvt':{'Solar':'PV','WindOffshore':'WS','WindOnshore':'WL'}})

Estimate hourly variation in supply:

In [19]:
d_y = {}
xvar, yvar = ['intercept','Forecast_MWh'], 'Actual_MWh'
nan_idx = df_supply[xvar[1:]+[yvar]].isna().any(axis=1)
g2hvt = df_supply[['g_E','hvt']].drop_duplicates()
g2hvt['label'] = ['_'.join([hvt,str(y)]) if g2hvt.groupby('hvt').size()[hvt]>1 else hvt for hvt,y in zip(g2hvt['hvt'],g2hvt['g_E'])]
for g,hvt,hvt_label in g2hvt.itertuples(index=False):
    # Subset data
    ghvt_idx = (df_supply['hvt']==hvt) & (df_supply['g_E']==g)
    mi = pd.Index(df_supply.loc[ghvt_idx,'h'].tolist(),name='h')
    idx = (~nan_idx) & (ghvt_idx) 
    df_tmp = df_supply[idx]
    if len(df_tmp)>0:
        X, y = df_tmp.assign(intercept=1.0)[xvar].values,df_tmp[[yvar]].values
        # Make OLS estimator:
        β_hat = np.matmul(np.linalg.inv(np.matmul(np.transpose(X),X)),np.matmul(np.transpose(X),y))
        # Make explanatory variables
        X_idx = df_supply.loc[ghvt_idx,xvar[-1]].isna()
        if X_idx.sum()>0:
            # Estimate missing values using 12 representative days:
            X_sim = df_supply[ghvt_idx][[xvar[-1]]+['HourOfTheDay','Month']]
            X_sim['dummy'] = X_sim.groupby(['HourOfTheDay','Month']).grouper.group_info[0]
            X_X = pd.get_dummies(X_sim.loc[~X_idx,'dummy'],drop_first=True).values
            y_X = X_sim.loc[~X_idx,[xvar[-1]]].values
            β_X = np.matmul(np.linalg.inv(np.matmul(np.transpose(X_X),X_X)),np.matmul(np.transpose(X_X),y_X))
            X_hat = np.matmul(pd.get_dummies(X_sim['dummy'],drop_first=True).values,β_X).flatten()
            X_sim.loc[X_idx,xvar[-1]] = X_hat[X_idx]
            X_sim = X_sim.assign(intercept=1.0)[xvar].values
        else:
            X_sim = df_supply[ghvt_idx].assign(intercept=1.0)[xvar].values
        # Make predicted load given β_hat
        d_y['_'.join([g,hvt])] = pd.Series(np.matmul(X_sim,β_hat).flatten(),index=mi,name=hvt_label)
    else:
        y_idx =  df_supply.loc[ghvt_idx,yvar].isna()
        if y_idx.sum()>0:
            # Estimate missing values using 12 representative days:
            y_sim = df_supply[ghvt_idx][[yvar]+['HourOfTheDay','Month']]
            y_sim['dummy'] = y_sim.groupby(['HourOfTheDay','Month']).grouper.group_info[0]
            X_y = pd.get_dummies(y_sim.loc[~y_idx,'dummy'],drop_first=True).values
            y_y = y_sim.loc[~y_idx,[yvar]].values
            β_y = np.matmul(np.linalg.inv(np.matmul(np.transpose(X_y),X_y)),np.matmul(np.transpose(X_y),y_y))
            y_hat = np.matmul(pd.get_dummies(y_sim['dummy'],drop_first=True).values,β_y).flatten()
            y_sim.loc[y_idx,yvar] = y_hat[y_idx]
            y_sim = y_sim.assign(intercept=1.0)[yvar].values
        else:
            y_sim = df_supply.loc[ghvt_idx,yvar].tolist()
        d_y['_'.join([g,hvt])] = pd.Series(y_sim,index=mi,name=hvt_label)

    # Collect in dataframe
CapVariation = pd.concat([d_y[hvt] for hvt in d_y.keys()],axis=1)

`Inspect`

In [20]:
CapVariation.head()

Unnamed: 0_level_0,ROR,PV_DK1,WS_DK1,WL_DK1,PV_DK2,WS_DK2,WL_DK2
h,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1.0,1.120471,559.630301,2310.433207,0.603258,258.497393,556.422855
2,1.0,1.120471,598.711511,2350.602615,0.603258,334.78707,649.508678
3,1.0,1.120471,243.167824,2235.115567,0.603258,262.566175,595.043569
4,1.0,1.120471,223.150619,2170.844514,0.603258,214.757978,571.276976
5,1.0,1.120471,223.150619,2115.611578,0.603258,208.654803,564.345053


Get capacities:

In [21]:
df_plant = pd.read_pickle(os.path.join(data_dir,'PlantData_DK_2023'))
df_int = df_plant.loc[df_plant['tech'].isin(['PV','SH','WL','WS','ROR']),['id','g_E','g_H','tech','GeneratingCapacity_E','GeneratingCapacity_H','Generation_E','Generation_H']].rename(columns={'tech':'hvt'})

`Inspect`

In [22]:
df_int.head()

Unnamed: 0,id,g_E,g_H,hvt,GeneratingCapacity_E,GeneratingCapacity_H,Generation_E,Generation_H
18,id_DK1_Central_SH,DK1,DK1_Central,SH,,4.1,0.0,0.002
37,id_DK1_LargeDecentral_SH,DK1,DK1_LargeDecentral,SH,,139.0,0.0,0.08
57,id_DK1_SmallDecentral_SH,DK1,DK1_SmallDecentral,SH,,780.031,0.0,0.422
65,id_DK1_nan_PV,DK1,,PV,2900.164,,3.754,0.0
66,id_DK1_nan_ROR,DK1,,ROR,6.894,,0.016,0.0


Make unique hvt identifier:

In [23]:
df_int['hvt'] = pd.Series(['_'.join([hvt,g_E]) if (str(g_H)=='nan') & (df_int.groupby('hvt').size()[hvt]>1) else '_'.join([hvt,str(g_H)]) if (isinstance(g_H,str)) & (df_int.groupby('hvt').size()[hvt]>1) else hvt for hvt,g_E,g_H in zip(df_int['hvt'],df_int['g_E'],df_int['g_H'])],index=df_int.index)

Compute capacity factors:

In [24]:
for hvt in CapVariation.columns:
    CapVariation[hvt] = CapVariation[hvt]/df_int.loc[df_int['hvt']==hvt,'GeneratingCapacity_E'].sum()

Assume solar heat has the same pattern as PV:

In [25]:
SH_gen = df_int[df_int['hvt'].str.find('SH')!=-1].set_index(['hvt'])['Generation_H']
SH_gen.index = pd.MultiIndex.from_tuples([('_'.join(hvt.replace('SH','PV').split('_')[0:2]),hvt) for hvt in SH_gen.index],name=['hvt_E','hvt'])
PV_gen = CapVariation[[x for x in CapVariation.columns if x.find('PV')!=-1]].sum().rename_axis('hvt_E')
scale = SH_gen.div(PV_gen)
for i in range(len(scale)):
    hvt_E, hvt = scale.index[i]
    CapVariation[hvt] = CapVariation[hvt_E]*scale[:,hvt].iloc[0]

Add standard:

In [26]:
CapVariation['Standard'] = 1

Save id2hvt mapping:

In [27]:
id2hvt = pd.concat([
    df_int[['id','hvt']],
    df_plant.loc[~df_plant['id'].isin(df_int['id']),['id']].assign(hvt='Standard')
],axis=0).reset_index(drop=True)

`Inspect`

In [28]:
id2hvt.head()

Unnamed: 0,id,hvt
0,id_DK1_Central_SH,SH_DK1_Central
1,id_DK1_LargeDecentral_SH,SH_DK1_LargeDecentral
2,id_DK1_SmallDecentral_SH,SH_DK1_SmallDecentral
3,id_DK1_nan_PV,PV_DK1
4,id_DK1_nan_ROR,ROR


## Heat Demand

Download from Open Power System Data:

`Download through URL and don't save csv because datafile is large.`

In [29]:
url = 'https://data.open-power-system-data.org/when2heat/2023-07-27/when2heat.csv'
df_hd = pd.read_csv(url, sep=';', on_bad_lines='warn',decimal=',')

In [30]:
df_hd.head()

Unnamed: 0,utc_timestamp,cet_cest_timestamp,AT_COP_ASHP_floor,AT_COP_ASHP_radiator,AT_COP_ASHP_water,AT_COP_GSHP_floor,AT_COP_GSHP_radiator,AT_COP_GSHP_water,AT_COP_WSHP_floor,AT_COP_WSHP_radiator,...,SK_heat_demand_water,SK_heat_demand_water_COM,SK_heat_demand_water_MFH,SK_heat_demand_water_SFH,SK_heat_profile_space_COM,SK_heat_profile_space_MFH,SK_heat_profile_space_SFH,SK_heat_profile_water_COM,SK_heat_profile_water_MFH,SK_heat_profile_water_SFH
0,2007-12-31T22:00:00Z,2007-12-31T23:00:00+0100,2.8,2.24,2.03,3.72,2.77,2.4,4.57,3.32,...,128.0,64.0,45.0,19.0,236,174,196,66,68,13
1,2007-12-31T23:00:00Z,2008-01-01T00:00:00+0100,2.8,2.24,2.03,3.72,2.77,2.4,4.57,3.32,...,128.0,64.0,45.0,19.0,236,174,196,66,68,13
2,2008-01-01T00:00:00Z,2008-01-01T01:00:00+0100,2.8,2.23,2.02,3.7,2.75,2.39,4.57,3.31,...,150.0,80.0,46.0,23.0,239,177,198,83,71,15
3,2008-01-01T01:00:00Z,2008-01-01T02:00:00+0100,2.78,2.21,2.02,3.67,2.72,2.38,4.55,3.29,...,147.0,84.0,45.0,19.0,244,179,204,86,69,12
4,2008-01-01T02:00:00Z,2008-01-01T03:00:00+0100,2.77,2.2,2.01,3.65,2.7,2.37,4.55,3.28,...,163.0,85.0,43.0,35.0,261,193,212,88,66,23


Subset to DK

In [31]:
DK_cols = ['utc_timestamp'] + [x for x in df_hd.columns if x.find('DK_')!=-1]
df_hd = df_hd[DK_cols].copy()

Format timestamp:

In [32]:
df_hd['HourUTC'] = pd.to_datetime(df_hd['utc_timestamp'].str.replace('T',' '),utc=True)
df_hd['HourCET/CEST'] = df_hd['HourUTC'].dt.tz_convert('Europe/Brussels')

Subset year:

`2019`

In [33]:
df_hd = df_hd[df_hd['HourCET/CEST'].dt.year==2019].drop(columns='utc_timestamp')

`Inspect to see that we have data for 2019.`

In [34]:
df_hd.head()

Unnamed: 0,DK_COP_ASHP_floor,DK_COP_ASHP_radiator,DK_COP_ASHP_water,DK_COP_GSHP_floor,DK_COP_GSHP_radiator,DK_COP_GSHP_water,DK_COP_WSHP_floor,DK_COP_WSHP_radiator,DK_COP_WSHP_water,DK_heat_demand_space,...,DK_heat_demand_water_MFH,DK_heat_demand_water_SFH,DK_heat_profile_space_COM,DK_heat_profile_space_MFH,DK_heat_profile_space_SFH,DK_heat_profile_water_COM,DK_heat_profile_water_MFH,DK_heat_profile_water_SFH,HourUTC,HourCET/CEST
96422,3.82,3.43,2.49,5.0,4.3,2.77,5.28,4.52,2.8,,...,,,126,112,93,76,90,30,2018-12-31 23:00:00+00:00,2019-01-01 00:00:00+01:00
96423,3.84,3.46,2.5,5.02,4.33,2.77,5.29,4.54,2.8,,...,,,125,94,88,83,70,15,2019-01-01 00:00:00+00:00,2019-01-01 01:00:00+01:00
96424,3.84,3.46,2.5,5.02,4.33,2.77,5.29,4.54,2.8,,...,,,130,89,92,86,68,12,2019-01-01 01:00:00+00:00,2019-01-01 02:00:00+01:00
96425,3.83,3.44,2.49,5.01,4.31,2.77,5.29,4.53,2.8,,...,,,144,100,105,88,65,22,2019-01-01 02:00:00+00:00,2019-01-01 03:00:00+01:00
96426,3.81,3.42,2.48,5.0,4.29,2.77,5.27,4.51,2.8,,...,,,160,130,133,121,105,53,2019-01-01 03:00:00+00:00,2019-01-01 04:00:00+01:00


Now select the columns we need:

`Heat profile columns only.`

In [35]:
LoadVariation_H = df_hd.set_index(['HourCET/CEST'])[[x for x in df_hd.columns if x.find('heat_profile')!=-1]].sum(axis=1).rename('LoadVariation_H')

Linearly interpolate missing values:

`Linear interpolation is a method to estimate the missing values in a dataset by filling in the gaps with values that create a straight line between the known values on either side.`

In [36]:
LoadVariation_H = LoadVariation_H.reindex(pd.date_range(LoadVariation_H.index[0],LoadVariation_H.index[-1],freq='H'))
id_time = LoadVariation_H[LoadVariation_H.isna()].index
LoadVariation_H = LoadVariation_H.interpolate()

Add index:

In [37]:
LoadVariation_H.index = pd.Index(range(1,8761),name='h')

Rescale load variation:

In [38]:
Tot_H = LoadVariation_H.sum()
LoadVariation_H = LoadVariation_H/Tot_H

Duplicate for all g_H areas:

In [39]:
mi = pd.MultiIndex.from_product([['c_'+x for x in df_plant['g_H'].dropna().unique()],LoadVariation_H.index],names=['c_H','h'])
LoadVariation_H = pd.Series(0,index=mi,name='LoadVariation_H').add(LoadVariation_H)

`Inspect LoadVariation_H`

In [40]:
print(LoadVariation_H)

c_H                   h   
c_DK1_Central         1       0.000088
                      2       0.000079
                      3       0.000079
                      4       0.000087
                      5       0.000117
                                ...   
c_DK2_SmallDecentral  8756    0.000165
                      8757    0.000165
                      8758    0.000150
                      8759    0.000120
                      8760    0.000091
Name: LoadVariation_H, Length: 52560, dtype: float64


Get total demand:

In [41]:
Load_H = df_plant.groupby('g_H')['Generation_H'].sum().mul(10**6).rename('Load_H').rename_axis('c_H')
Load_H.index = ['c_'+x for x in Load_H.index]

`Inspect: Load for geographical scope.`

In [42]:
print(Load_H)

c_DK1_Central           12759000.0
c_DK1_LargeDecentral     3896000.0
c_DK1_SmallDecentral     5906000.0
c_DK2_Central           12200000.0
c_DK2_LargeDecentral     2017000.0
c_DK2_SmallDecentral     1998000.0
Name: Load_H, dtype: float64


## Transmission capacities 

Get hourly variations:

In [43]:
file_path = os.path.join(data_dir,'TransmissionCapacities_DK_2019')
df_ntc = pd.read_pickle(file_path)

Melt dataframe:

In [44]:
idx = (df_ntc['g_E']=='DK2') & (df_ntc['g_EE']=='DK1')
df_ntc = pd.concat([
    df_ntc.loc[~idx,['g_E','g_EE','h','ImportCapacity_MW']].rename(columns={'g_E':'g_EE','g_EE':'g_E','ImportCapacity_MW':'NTC'}),
    df_ntc.loc[~idx,['g_E','g_EE','h','ExportCapacity_MW']].rename(columns={'ExportCapacity_MW':'NTC'})
],axis=0)

`Inspect df_ntc`

In [45]:
df_ntc.head()

Unnamed: 0,g_EE,g_E,h,NTC
0,DK1,DELU,1,1500.0
1,DK1,DELU,2,1500.0
2,DK1,DELU,3,1500.0
3,DK1,DELU,4,1500.0
4,DK1,DELU,5,1500.0


Get maximum capacity:

In [46]:
df_ttc = pd.read_pickle(os.path.join(data_dir,'TTC_DK_2023')).replace({'NOS':'NO2'})

Add hourly variations:

In [47]:
df_TCap = df_ntc.merge(df_ttc,how='outer')

`Inspect df_TCap`

In [48]:
df_TCap.head()

Unnamed: 0,g_EE,g_E,h,NTC,Year,TTC
0,DK1,DELU,1,1500.0,2023,2500
1,DK1,DELU,2,1500.0,2023,2500
2,DK1,DELU,3,1500.0,2023,2500
3,DK1,DELU,4,1500.0,2023,2500
4,DK1,DELU,5,1500.0,2023,2500


Correct TTC if actual measured it higher:

In [49]:
df_TCap['TTC'] = [max(x,y) for x,y in zip(df_TCap['NTC'],df_TCap['TTC'])]

Calculate capacity factor:

In [50]:
df_TCap['CapVariation'] = [max(x,0)/y for x,y in zip(df_TCap['NTC'],df_TCap['TTC'])]

Find rows with foreign connections:

In [51]:
idx_DK = (df_TCap['g_E'].str.find('DK')!=-1) & (df_TCap['g_EE'].str.find('DK')!=-1)

Make variation in import capacities:

In [52]:
import_idx = (df_TCap['g_E'].str.find('DK')!=-1) & (~idx_DK)
df_M = df_TCap[import_idx].copy()
df_M['hvt'] = [g_E+'_ImportFrom_'+g_EE for g_E,g_EE in zip(df_M['g_E'],df_M['g_EE'])] 
df_M2 = df_M.set_index(['h','hvt'])['CapVariation'].unstack('hvt').rename_axis(None,axis=1)

Add import capacity variation to the main dataframe

In [53]:
CapVariation = pd.concat([CapVariation,df_M2],axis=1)

`Inspect CapVariation`

In [54]:
CapVariation.head()

Unnamed: 0_level_0,ROR,PV_DK1,WS_DK1,WL_DK1,PV_DK2,WS_DK2,WL_DK2,SH_DK1_Central,SH_DK1_LargeDecentral,SH_DK1_SmallDecentral,SH_DK2_Central,SH_DK2_LargeDecentral,SH_DK2_SmallDecentral,Standard,DK1_ImportFrom_DELU,DK1_ImportFrom_NL,DK1_ImportFrom_NO2,DK1_ImportFrom_SE3,DK2_ImportFrom_DELU,DK2_ImportFrom_SE4
h,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,0.145054,0.000386,0.348375,0.561605,0.000597,0.246376,0.749428,3.570287e-09,1.428115e-07,7.533306e-07,1.7666e-09,3.533201e-09,1.94326e-07,1,0.36,0.0,0.938725,0.293706,0.593909,0.275429
2,0.145054,0.000386,0.372704,0.571369,0.000597,0.319088,0.874803,3.570287e-09,1.428115e-07,7.533306e-07,1.7666e-09,3.533201e-09,1.94326e-07,1,0.36,0.0,0.938725,0.28951,0.593909,0.271429
3,0.145054,0.000386,0.151374,0.543297,0.000597,0.250254,0.801445,3.570287e-09,1.428115e-07,7.533306e-07,1.7666e-09,3.533201e-09,1.94326e-07,1,0.36,0.0,0.938725,0.302098,0.593909,0.282857
4,0.145054,0.000386,0.138913,0.527675,0.000597,0.204687,0.769435,3.570287e-09,1.428115e-07,7.533306e-07,1.7666e-09,3.533201e-09,1.94326e-07,1,0.36,0.0,0.938725,0.309091,0.593909,0.290286
5,0.145054,0.000386,0.138913,0.514249,0.000597,0.19887,0.760099,3.570287e-09,1.428115e-07,7.533306e-07,1.7666e-09,3.533201e-09,1.94326e-07,1,0.36,0.0,0.938725,0.327273,0.593909,0.306857


Add variation in importing capacities to mapping:

In [55]:
import_hvts = CapVariation.columns[CapVariation.columns.str.find('ImportFrom')!=-1]
import_ids = ['id_' + hvt for hvt in import_hvts]
id2hvt = pd.concat([id2hvt,pd.DataFrame().assign(id=import_ids,hvt=import_hvts)],axis=0)

Make variation in export capacities:

In [56]:
export_idx = (df_TCap['g_EE'].str.find('DK')!=-1) & (~idx_DK)
df_X = df_TCap[export_idx].copy()
df_X['c_E'] = [g_EE+'_ExportTo_'+g_E for g_E,g_EE in zip(df_X['g_E'],df_X['g_EE'])]
df_X2 = df_X.set_index(['c_E','h'])['CapVariation'].rename('LoadVariation_E')

Add export variation to load variation:

In [57]:
LoadVariation_E = pd.concat([LoadVariation_E,df_X2],axis=0)

Add total demand:

In [58]:
Load_E = pd.concat([Load_E,df_X.groupby('c_E')['TTC'].max().rename('Load_E')],axis=0)

Finally, get actual transmission capacities if they are present:

In [59]:
lineCapacity = df_TCap[idx_DK].groupby(['g_E','g_EE'])['TTC'].max().reset_index().rename(columns={'g_EE':'g_E_alias'})

And also variation transmission lines:

In [60]:
lineVariation = df_TCap.loc[idx_DK,['g_E','g_EE','h','CapVariation']].rename(columns={'g_EE':'g_E_alias','CapVariation':'lineVariation'})

## Generation technologies

Electricity generators from Danish Energy Agency:

In [61]:
GeneratingCapacity_E = df_plant.set_index('id')['GeneratingCapacity_E'].dropna()

Add importing areas:

In [62]:
df_M['id'] = ['id_'+x for x in df_M['hvt']]
GeneratingCapacity_E = pd.concat([
    GeneratingCapacity_E,
    df_M.groupby('id')['TTC'].max().rename('GeneratingCapacity_E')
],axis=0).reset_index()

Heat generators from Danish Energy Agency:

In [63]:
GeneratingCapacity_H = df_plant.set_index('id')['GeneratingCapacity_H'].dropna().reset_index()

Fuel mix:

In [64]:
FuelMix = df_plant.set_index(['id','BFt'])['FuelMix'].dropna().reset_index()

Electricity to Heat ratios:

In [65]:
E2H = df_plant.set_index('id')['E2H'].dropna().reset_index()

Electricity to Hydrogen ratios:

In [66]:
E2HH = df_plant.set_index('id')['E2HH'].dropna().reset_index()

Foreign plants marginal generation costs:

In [67]:
OtherMC = df_prices[['g_E','g_EE','h','SpotPrice_€/MWh']].drop_duplicates()
OtherMC['id'] = ['id_'+g_EE+'_ImportFrom_'+g_E for g_E,g_EE in zip(OtherMC['g_E'],OtherMC['g_EE'])]
OtherMC = OtherMC.set_index(['id','h'])['SpotPrice_€/MWh'].rename('OtherMC').unstack('id').rename_axis(None,axis=1)

## Marginal generation costs:

Download Danish Energy Agency's technology catalogue:

In [68]:
file_path = os.path.join(direc,'RawData','technology_data_for_el_and_dh.xlsx')
if os.path.isfile(file_path):
    df_tc = pd.read_excel(file_path,decimal=',')
else:
    url = 'https://ens.dk/sites/ens.dk/files/Analyser/technology_data_for_el_and_dh.xlsx'
    df_tc= pd.read_excel(url,sheet_name='alldata_flat')
    df_tc.to_excel(file_path)

Subset to most recent year:

`Find unique values in year column to make decision.`

In [69]:
print(df_tc['year'].unique())

[2015 2020 2030 2050 2040 2025]


`We are also going to use 2020 as the most recent year. Additionally, it is closer to 2019 than 2015 is to 2019.`

In [70]:
df_tc = df_tc[df_tc['year']==2020]

Subset to only financial data:

In [71]:
df_tc = df_tc[df_tc['cat']=='Financial data']

Get DEA's technologies:

In [72]:
DEA_tc = df_tc['technology'].sort_values().unique()

Specifying mapping between model technologies and the ones in Danish Energy Agency's catalogue:

In [73]:
# Plants from ClimateOutlook
print(df_plant['tech'].unique())

['BP_Biogas' 'IndustryH_Biogas' 'BH_Biomass' 'BP_Biomass' 'BP_Coal'
 'BH_Natgas' 'BP_Natgas' 'IndustryH_Natgas' 'BH_Oil' 'BP_Oil'
 'IndustryH_Oil' 'BH_Waste' 'BP_Waste' 'EP' 'HPstandard' 'HPsurplusheat'
 'IH' 'IndustryH' 'SH' 'BH_Biogas' 'IndustryH_Biomass' 'GT' 'CD_Biogas'
 'IndustryE_Biogas' 'IndustryE_Biomass' 'CD_Natgas' 'IndustryE_Natgas'
 'CD_Oil' 'IndustryE_Oil' 'PV' 'ROR' 'WL' 'WS' 'CD_Coal']


In [74]:
tech2technology = {
    ## BH (Boiler)
    # 'BH_Biogas':[np.nan], 
    'BH_Biomass':[x for x in DEA_tc if x.find('Biomass boiler')!=-1], 
    'BH_Natgas':[x for x in DEA_tc if x.find('Gas boiler')!=-1],  
    # 'BH_Oil': [np.nan], 
    'BH_Waste':[x for x in DEA_tc if x.find('Waste boiler')!=-1],
    'IH':[x for x in DEA_tc if x.find('Electric boiler')!=-1], 
    ## BP
    'BP_Biogas':[x for x in DEA_tc if x.find('back pressure - biogas')!=-1], 
    'BP_Biomass':[x for x in DEA_tc if x.find('Biomass CHP - back pressure')!=-1], 
    'BP_Coal':[x for x in DEA_tc if x.find('Coal power plant, supercritical - extraction -')!=-1], 
    'BP_Natgas':[x for x in DEA_tc if x.find('back pressure - natural gas')!=-1], 
    'BP_Oil':[x for x in DEA_tc if x.find('back pressure - light fuel oil')!=-1],
    'BP_Waste':[x for x in DEA_tc if x.find('Waste CHP')!=-1], 
    ## CD (Steam turbine for electricity production only)
    # 'CD_Biogas':[np.nan], 
    'CD_Coal':[x for x in DEA_tc if x.find('Coal power plant, supercritical - extraction -')!=-1], 
    'CD_Natgas':[x for x in DEA_tc if x.find('extraction - natural gas')!=-1], 
    'CD_Oil':[x for x in DEA_tc if x.find('back pressure - light fuel oil')!=-1], 
    ## Geothermal
    'GT':[x for x in DEA_tc if x.find('Geothermal plant, absorption heat pump')!=-1],
    ## HP
    'HPstandard':[x for x in DEA_tc if x.find('Heat pump, air source')!=-1], 
    'HPsurplusheat':[x for x in DEA_tc if x.find('Heat pump, industrial excess heat')!=-1],
    # 'IndustryE_Biogas':[np.nan],
    # 'IndustryE_Natgas':[np.nan], 
    # 'IndustryE_Oil':[np.nan], 
    # 'IndustryH':[np.nan],
    # 'IndustryH_Biogas':[np.nan], 
    # 'IndustryH_Biomass':[np.nan], 
    # 'IndustryH_Natgas':[np.nan],
    # 'IndustryH_Oil':[np.nan], 
    'PV':[x for x in DEA_tc if x.find('PV -')!=-1], 
    # 'EP':[np.nan], 
    # 'ROR':[x for x in DEA_tc if x.find('Wave power')!=-1], 
    'SH':[x for x in DEA_tc if x.find('Solar DH')!=-1],
    'WL':[x for x in DEA_tc if x.find('Onshore wind turbine')!=-1],
    'WS':[x for x in DEA_tc if x.find('Offshore wind turbines')!=-1]
}

Inflation rates:

In [75]:
π = pd.Series([1.531122704, 3.289449396, 2.662841655, 1.219993423, 0.199343827, -0.06164468, 0.183334861, 1.429107433, 1.73860862, 1.630522608, 0.476498853, 2.554506996, 8.833698867],index=pd.Index(range(2010,2023),name='t')).div(100).add(1)

Now get the average estimates:

`Adjusting inflation: As we use electricity and heat prices for 2019 and we have the years 2015 and 2020 as base years in the file "technology_data_for_el_and_dh", we either need to adjust prices upwards (from 2015 to 2019) or downward (from 2020 to 2019). We do this with the if, elif loop and adjusting the lambda function in the code below.`

In [76]:
df_tc

Unnamed: 0.1,Unnamed: 0,ws,technology,tech,type,input,size,cat,par,par_short,info,unit,priceyear,note,ref,est,year,val
45,45,01 Coal CHP,"Coal power plant, supercritical - extraction -...","Coal power plant, supercritical",extraction,coal,medium,Financial data,Fixed O&M (*total) [2015-EUR/MW_e/y],Fixed O&M,*total,EUR/MW_e/y,2015.0,J,"[17, 18, 19, 20, 21, 22]",ctrl,2020,31000.000000
49,49,01 Coal CHP,"Coal power plant, supercritical - extraction -...","Coal power plant, supercritical",extraction,coal,medium,Financial data,Nominal investment (*total) [2015-MEUR/MW_e],Nominal investment,*total,MEUR/MW_e,2015.0,J,"[17, 18, 19, 20, 21, 22]",ctrl,2020,1.900000
53,53,01 Coal CHP,"Coal power plant, supercritical - extraction -...","Coal power plant, supercritical",extraction,coal,medium,Financial data,Variable O&M (*total) [2015-EUR/MWh_e],Variable O&M,*total,EUR/MWh_e,2015.0,J,"[17, 18, 19, 20, 21, 22]",ctrl,2020,2.900000
181,181,02 LTE existing plant,Life time extension of coal extraction plant -...,Life time extension of coal extraction plant,refurbish,coal,large,Financial data,Nominal investment (equipment) [2015-MEUR/MW_e],Nominal investment,equipment,MEUR/MW_e,2015.0,,,ctrl,2020,
182,182,02 LTE existing plant,Life time extension of coal extraction plant -...,Life time extension of coal extraction plant,refurbish,coal,large,Financial data,Nominal investment (equipment) [2015-MEUR/MW_e],Nominal investment,equipment,MEUR/MW_e,2015.0,,,lower,2020,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18528,18528,21 Near shore turbines,"Offshore wind turbines, nearshore - renewable ...","Offshore wind turbines, nearshore",renewable power,wind,large,Financial data,Nominal investment (project development etc.) ...,Nominal investment,project development etc.,MEUR/MW_e,2020.0,,"3, 6",ctrl,2020,0.394347
18537,18537,21 Near shore turbines,"Offshore wind turbines, nearshore - renewable ...","Offshore wind turbines, nearshore",renewable power,wind,large,Financial data,Nominal investment (array cables) [2020-MEUR/M...,Nominal investment,array cables,MEUR/MW_e,2020.0,,3,ctrl,2020,0.012591
18546,18546,21 Near shore turbines,"Offshore wind turbines, nearshore - renewable ...","Offshore wind turbines, nearshore",renewable power,wind,large,Financial data,Nominal investment (foundation) [2020-MEUR/MW_e],Nominal investment,foundation,MEUR/MW_e,2020.0,,"3, 4",ctrl,2020,0.134929
18555,18555,21 Near shore turbines,"Offshore wind turbines, nearshore - renewable ...","Offshore wind turbines, nearshore",renewable power,wind,large,Financial data,Nominal investment (grid connection) [2020-MEU...,Nominal investment,grid connection,MEUR/MW_e,2020.0,,"3, 5",ctrl,2020,0.052000


In [77]:
# Check FOMs without inflation adjustment
df_plant

Unnamed: 0,Year,g_E,g_H,BFt,tech,modelTech,GeneratingCapacity_E,GeneratingCapacity_H,FuelMix,E2H,E2HH,Generation_E,Generation_H,id
0,2023,DK1,DK1_Central,Biogas,BP_Biogas,BP,9.412,,2.544304,0.895869,,0.079,0.087,id_DK1_Central_BP_Biogas
1,2023,DK1,DK1_Central,Biogas,IndustryH_Biogas,standard_H,,8.414,0.518519,,,0.000,0.027,id_DK1_Central_IndustryH_Biogas
2,2023,DK1,DK1_Central,Biomass,BH_Biomass,standard_H,,266.306,0.946472,,,0.000,0.411,id_DK1_Central_BH_Biomass
3,2023,DK1,DK1_Central,Biomass,BP_Biomass,BP,671.081,,4.169188,0.504207,,2.181,5.694,id_DK1_Central_BP_Biomass
4,2023,DK1,DK1_Central,Coal,BP_Coal,BP,1091.285,,2.650372,0.825316,,1.882,1.131,id_DK1_Central_BP_Coal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,2023,DK2,,Oil,CD_Oil,standard_E,833.282,,2.600000,,,0.005,0.000,id_DK2_nan_CD_Oil
119,2023,DK2,,Oil,IndustryE_Oil,standard_E,1.250,,,,,0.000,0.000,id_DK2_nan_IndustryE_Oil
120,2023,DK2,,,PV,standard_E,1010.638,,,,,1.282,0.000,id_DK2_nan_PV
121,2023,DK2,,,WL,standard_E,742.463,,,,,1.812,0.000,id_DK2_nan_WL


In [101]:
π[range(int(2015 + 1), 2023)].cumprod()[2019]

1.050674133950355

In [78]:
FOM_tmp = [x for x in df_tc['par'] if x.find('Fixed O&M (*total)')!=-1]
for tech in tech2technology.keys():
    idx_plant = df_plant['tech']==tech
    df_tmp = df_tc[df_tc['technology'].isin(tech2technology[tech])].copy()
    if (df_tmp['par'].str.find('Variable O&M (other O&M)')!=-1).sum()>0:
        OtherMC_tmp = [x for x in df_tmp['par'] if x.find('Variable O&M (other O&M)')!=-1]
    else:
        OtherMC_tmp = [x for x in df_tmp['par'] if x.find('Variable O&M (*total)')!=-1]
    techvars = {
        'OtherMC': OtherMC_tmp,
        'FOM': FOM_tmp
    }
    for var in techvars.keys():
        df_var = df_tmp[df_tmp['par'].isin(techvars[var])].copy()
        if len(df_var) > 0 and (df_var['priceyear'] != 2020).any():
            df_var['price_scale'] = df_var['priceyear'].apply(lambda x: π[range(int(x + 1), 2023)].cumprod()[2019])
            val = (df_var['price_scale'] * df_var['val']).mean()
        elif len(df_var) > 0 and (df_var['priceyear'] == 2020).any():
            df_var['price_scale'] = df_var['priceyear'].apply(lambda x: 1 / π[2020])
            val = (df_var['price_scale'] * df_var['val']).mean()
        else:
            val = 0
        df_plant.loc[idx_plant,var] = val

In [80]:
# Check FOMs with inflation adjustment
df_plant

Unnamed: 0,Year,g_E,g_H,BFt,tech,modelTech,GeneratingCapacity_E,GeneratingCapacity_H,FuelMix,E2H,E2HH,Generation_E,Generation_H,id,OtherMC,FOM
0,2023,DK1,DK1_Central,Biogas,BP_Biogas,BP,9.412,,2.544304,0.895869,,0.079,0.087,id_DK1_Central_BP_Biogas,9.280955,109751.668909
1,2023,DK1,DK1_Central,Biogas,IndustryH_Biogas,standard_H,,8.414,0.518519,,,0.000,0.027,id_DK1_Central_IndustryH_Biogas,,
2,2023,DK1,DK1_Central,Biomass,BH_Biomass,standard_H,,266.306,0.946472,,,0.000,0.411,id_DK1_Central_BH_Biomass,1.038205,40675.097972
3,2023,DK1,DK1_Central,Biomass,BP_Biomass,BP,671.081,,4.169188,0.504207,,2.181,5.694,id_DK1_Central_BP_Biomass,4.441489,213076.714365
4,2023,DK1,DK1_Central,Coal,BP_Coal,BP,1091.285,,2.650372,0.825316,,1.882,1.131,id_DK1_Central_BP_Coal,3.046955,32570.898152
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,2023,DK2,,Oil,CD_Oil,standard_E,833.282,,2.600000,,,0.005,0.000,id_DK2_nan_CD_Oil,,
119,2023,DK2,,Oil,IndustryE_Oil,standard_E,1.250,,,,,0.000,0.000,id_DK2_nan_IndustryE_Oil,,
120,2023,DK2,,,PV,standard_E,1010.638,,,,,1.282,0.000,id_DK2_nan_PV,0.000000,11802.096479
121,2023,DK2,,,WL,standard_E,742.463,,,,,1.812,0.000,id_DK2_nan_WL,1.576011,35985.589088


For industry generators, we just assume marginal costs are zero:

In [81]:
idx_plant = df_plant['tech'].str.find('Industry')!=-1
df_plant.loc[idx_plant,'OtherMC'], df_plant.loc[idx_plant,'FOM'] = 0, 0

For the remaining we average out the estimates of samilar technologies:

In [82]:
df_plant['tech_tmp'] = [x[0] for x in df_plant['tech'].str.split('_')]
idx_nans = df_plant['OtherMC'].isna()
df_plant.loc[idx_nans,'OtherMC'] = df_plant.groupby('tech_tmp')['OtherMC'].transform('mean')[idx_nans]
df_plant.loc[idx_nans,'FOM'] = df_plant.groupby('tech_tmp')['FOM'].transform('mean')[idx_nans]

Subset:

In [83]:
OtherMC_domes = df_plant.set_index('id')['OtherMC']
FOM = df_plant.set_index('id')['FOM']

Now add FOM of foreign plants:

In [84]:
id_tcap = GeneratingCapacity_E['id'][~GeneratingCapacity_E['id'].isin(FOM.index)]
FOM = pd.concat([FOM,pd.Series(0,index=pd.Index(id_tcap,name='id'),name='FOM')],axis=0)

Add marginal generation costs of foreign plants:

In [85]:
for plant in OtherMC_domes.index:
    OtherMC = pd.concat([OtherMC,pd.Series(OtherMC_domes.loc[plant],index=OtherMC.index,name=plant)],axis=1)

## Mappings

Add exporting tranmission lines:

In [86]:
df_maps = pd.concat([df_plant[['id','g_E','g_H','tech','modelTech','GeneratingCapacity_E','GeneratingCapacity_H']],
    pd.DataFrame().assign(
        id = GeneratingCapacity_E['id'][GeneratingCapacity_E['id'].str.find('ImportFrom')!=-1].tolist(),
        GeneratingCapacity_E = GeneratingCapacity_E['GeneratingCapacity_E'][GeneratingCapacity_E['id'].str.find('ImportFrom')!=-1].tolist(),
        GeneratingCapacity_H = np.nan,
        g_E = [x[1] for x in GeneratingCapacity_E['id'][GeneratingCapacity_E['id'].str.find('ImportFrom')!=-1].str.split('_')],
        g_H = np.nan,
        tech = [x[1]+'_ImportFrom_'+x[-1] for x in GeneratingCapacity_E['id'][GeneratingCapacity_E['id'].str.find('ImportFrom')!=-1].str.split('_')],
        modelTech = 'standard_E'
    )
],axis=0)

## Save variables

In [87]:
output_dir = os.path.join(direc,'ModelData')

In [88]:
# Capacity variation
CapVariation.reset_index().to_excel(os.path.join(output_dir,'CapVariation.xlsx'),index=False)

In [89]:
# Generating capacities
GeneratingCapacity_E.to_excel(os.path.join(output_dir,'GeneratingCapacity_E.xlsx'),index=False)
GeneratingCapacity_H.to_excel(os.path.join(output_dir,'GeneratingCapacity_H.xlsx'),index=False)

In [90]:
# Costs:
OtherMC.reset_index().to_excel(os.path.join(output_dir,'OtherMC.xlsx'),index=False)
FOM.reset_index().to_excel(os.path.join(output_dir,'FOM.xlsx'),index=False)

In [91]:
# Fuel mix
FuelMix.to_excel(os.path.join(output_dir,'FuelMix.xlsx'),index=False)

In [92]:
# Electricity-to-Heat/Hydrogen ratios
E2H.to_excel(os.path.join(output_dir,'E2H.xlsx'),index=False)
E2HH.to_excel(os.path.join(output_dir,'E2HH.xlsx'),index=False)

In [93]:
# Electricity demand
LoadVariation_E.reset_index().to_excel(os.path.join(output_dir,'LoadVariation_E.xlsx'),index=False)
Load_E.reset_index().to_excel(os.path.join(output_dir,'Load_E.xlsx'),index=False)
MWP_E.reset_index().to_excel(os.path.join(output_dir,'MWP_E.xlsx'),index=False)

In [94]:
# Heat demand
LoadVariation_H.reset_index().to_excel(os.path.join(output_dir,'LoadVariation_H.xlsx'),index=False)
Load_H.reset_index().to_excel(os.path.join(output_dir,'Load_H.xlsx'),index=False)

In [95]:
# Transmission capacities
lineCapacity.to_excel(os.path.join(output_dir,'lineCapacity.xlsx'),index=False)
lineVariation.to_excel(os.path.join(output_dir,'lineVariation.xlsx'),index=False)

In [96]:
# Mappings:
df_maps[['id','tech']].to_excel(os.path.join(output_dir,'id2tech.xlsx'),index=False)
df_maps[['tech','modelTech']].drop_duplicates().to_excel(os.path.join(output_dir,'tech2modelTech.xlsx'),index=False)
df_maps.loc[df_maps['modelTech'].isin(['standard_E','BP','HP']),['id','g_E']].to_excel(os.path.join(output_dir,'id2g_E.xlsx'),index=False)
df_maps.loc[df_maps['modelTech'].isin(['standard_H','BP','HP']),['id','g_H']].to_excel(os.path.join(output_dir,'id2g_H.xlsx'),index=False)
df_maps.loc[df_maps['GeneratingCapacity_H'].isna()==False,['g_E','g_H']].drop_duplicates().to_excel(os.path.join(output_dir,'g_E2g_H.xlsx'),index=False)
id2hvt.to_excel(os.path.join(output_dir,'id2hvt.xlsx'),index=False)

In [100]:
# Hour to date mappings
pd.read_pickle(os.path.join(data_dir,'Load_DK1DK2_2019')).set_index(['g_E','h']).xs('DK1',level='g_E').reset_index()[['h','HourOfTheDay','Weekday','Week','Month']].to_excel(os.path.join(output_dir,'h2dates.xlsx'),index=False)