# Load packages and settings

In [1]:
import os
os.system('clear')

import numpy as np
import pandas as pd
from datetime import datetime

from termcolor import colored
from SecretColors.palette import Palette
material = Palette('material', color_mode = 'hexa')

hex_salmon = '#F68F83'
hex_gold = '#BC9661'
hex_indigo = '#2D2E5F'
hex_maroon = '#8C4750'
hex_white = '#FAFAFA'
hex_blue = '#7EB5D2'

import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.dates import DateFormatter
import matplotlib.dates as dates
mpl.rcParams['font.family'] = 'SF Compact Text'
mpl.rcParams['font.weight'] = 'medium'
mpl.rcParams['axes.titleweight'] = 'semibold'
mpl.rcParams['axes.labelweight'] = 'medium'
mpl.rcParams['axes.prop_cycle'] = mpl.cycler(color=[hex_indigo, hex_salmon, hex_maroon])
mpl.rcParams['figure.titlesize'] = 'large'
mpl.rcParams['figure.titleweight'] = 'semibold'

print('')
print(colored('...','white'))
print('')
print(colored(f'Finished successfully', 'green'))
print('')
print(colored('...','white'))
print('')


[37m...[0m

[32mFinished successfully[0m

[37m...[0m



# Convert ID (raw) data to ID (pkl)

## y < 2019

In [2]:
yyears = ['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']

flist = [f for f in sorted(os.listdir('./raw data/ID/< 2019')) if not (f.startswith('.') or f.startswith('~'))]

print('')
print(colored('...','white'))
print('')
print(colored(f'Files in directory: {flist}', 'blue'))
print('')

for filename in flist:
    y = filename[:4]
    if y in yyears:

        print(colored('...','white'))
        print('')
        print(colored(f'Currently processing file: {filename}', 'blue'))
        print(colored(f'Currently processing year: {y}', 'blue'))
        print('')
        
        df = pd.read_excel(f"./raw data/ID/< 2019/{filename}", header = None)
        df = df.replace('Instrument code', 'Instrumentcode')
        df = df.replace('Contract type', 'Contracttype')
        df = df.dropna(how = 'all', axis = 0)
        df = df.dropna(how = 'all', axis = 1)
        df = df.reset_index(drop = True)
        
        df.columns = df.iloc[0]
        df = df.drop(0)

        df = df[~df['Instrumentcode'].str.contains('1H-25')]
        df = df[~df['Instrumentcode'].str.contains('1H 25')]

        df1 = df[~df['Instrumentcode'].str.contains('NL ID ')]
        df2 = df[df['Instrumentcode'].str.contains('NL ID ')]

        df1['Date'] = pd.to_datetime(df1['Instrumentcode'].str[:7], format = '%d%b%y')
        df2['Date'] = pd.to_datetime(df2['Instrumentcode'].str.replace("NL ID ", "").str[:7], format = '%y%b%d')
        df = pd.concat([df1, df2]).sort_index()

        df = df[df['Date'].dt.year == int(y)]

        df['Time'] = (df['Instrumentcode'].str[-2:].astype('int64') - 1).astype('str')
        df['Date'] = pd.to_datetime(df['Date'].dt.strftime("%Y-%m-%d") + ' ' + df['Time'].astype(str) + ':00:00')

        df['Price'] = df['Price'].astype('float')
        df['Volume'] = df['Volume'].astype('float')

        print(df)

        df.to_pickle(f"./data/ID/{os.path.splitext(filename)[0]}.pkl")

print(colored('...','white'))
print('')
print(colored(f'Finished successfully', 'green'))
print('')
print(colored('...','white'))
print('')


[37m...[0m

[34mFiles in directory: ['2006_Intraday.xls', '2007_Intraday.xls', '2008_Intraday.xls', '2009_Intraday.xls', '2010_Intraday.xls', '2011_Intraday.xls', '2012_Intraday.xls', '2013_Intraday.xls', '2014_Intraday.xlsx', '2015_Intraday.xlsx', '2016_Intraday.xlsx', '2017_Intraday.xlsx', '2018_Intraday.xlsx'][0m

[37m...[0m

[34mCurrently processing file: 2010_Intraday.xls[0m
[34mCurrently processing year: 2010[0m

0      Instrumentcode  Side  Volume  Price Contracttype  \
1     02JAN10 - 1H 19   buy    20.0   61.0         Spot   
2     02JAN10 - 1H 20   buy    20.0   61.0         Spot   
3     02JAN10 - 1H 21   buy    20.0   55.0         Spot   
4     02JAN10 - 1H 22   buy    20.0   44.0         Spot   
5     02JAN10 - 1H 23   buy    20.0   46.0         Spot   
...               ...   ...     ...    ...          ...   
2486  28DEC10 - 1H 21   buy    15.0   66.0         Spot   
2487  28DEC10 - 1H 13  sell    15.0   72.0         Spot   
2488  28DEC10 - 1H 14  sell    15.0

## y == 2019

In [3]:
# yyears = ['2019']

# flist = [f for f in sorted(os.listdir("./raw data/ID/= 2019")) if not (f.startswith('.') or f.startswith('~'))]

# print('')
# print(colored('...','white'))
# print('')
# print(colored(f'Files in directory: {flist}', 'blue'))
# print('')

# df3 = pd.DataFrame()

# for filename in flist:
#     y = filename[:4]
#     m = filename[4:6]
#     if y in yyears:

#         print(colored('...','white'))
#         print('')
#         print(colored(f'Currently processing file: {filename}', 'blue'))
#         print(colored(f'Currently processing year: {y}', 'blue'))
#         print(colored(f'Currently processing month: {m}', 'blue'))
#         print('')
        
#         df = pd.read_excel(f"./raw data/ID/= 2019/{filename}", header = None, skip_blank_lines=True, decimal=',', thousands='.')
#         df = df.replace('Instrument code', 'Instrumentcode')
#         df = df.replace('Contract type', 'Contracttype')
#         df = df.dropna(how = 'all', axis = 0)
#         df = df.dropna(how = 'all', axis = 1)
#         df = df.reset_index(drop = True)
        
#         df.columns = df.iloc[0]
#         df = df.drop(0)

#         df = df[~df['Instrumentcode'].str.contains('1H-25')]

#         df1 = df[~df['Instrumentcode'].str.contains('NL ID ')]
#         df2 = df[df['Instrumentcode'].str.contains('NL ID ')]

#         df1['Date'] = pd.to_datetime(df1['Instrumentcode'].str[:7], format = '%d%b%y')
#         df2['Date'] = pd.to_datetime(df2['Instrumentcode'].str.replace("NL ID ", "").str[:7], format = '%y%b%d')
#         df = pd.concat([df1, df2]).sort_index()

#         df = df[df['Date'].dt.year == int(y)]

#         df['Time'] = (df['Instrumentcode'].str[-2:].astype('int64') - 1).astype('str')
#         df['Date'] = pd.to_datetime(df['Date'].dt.strftime("%Y-%m-%d") + ' ' + df['Time'].astype(str) + ':00:00')

#         df['Price'] = df['Price'].astype('float')
#         df['Volume'] = df['Volume'].astype('float')

#         df3 = pd.concat([df3, df])

# df3.sort_values(by=['Date'])

# df3.to_pickle(f"./data/ID/{y}_Intraday.pkl")

# print(colored('...','white'))
# print('')
# print(colored(f'Finished successfully', 'green'))
# print('')
# print(colored('...','white'))
# print('')

## y > 2019 

In [4]:
# yyears = ['2020', '2021']

# flist = [f for f in sorted(os.listdir("./raw data/ID/> 2019")) if not (f.startswith('.') or f.startswith('~'))]

# print('')
# print(colored('...','white'))
# print('')
# print(colored(f'Files in directory: {flist}', 'blue'))
# print('')

# df2 = pd.DataFrame()

# for filename in flist:
#     y = filename[21:25]
#     m = filename[25:27]
#     d = filename[27:29]
#     if y in yyears:

#         print(colored('...','white'))
#         print('')
#         print(colored(f'Currently processing file: {filename}', 'blue'))
#         print(colored(f'Currently processing year: {y}', 'blue'))
#         print(colored(f'Currently processing month: {m}', 'blue'))
#         print(colored(f'Currently processing day: {d}', 'blue'))
#         print('')
        
#         df = pd.read_csv(f"./raw data/ID/> 2019/{filename}", header = None, skip_blank_lines=True, decimal=',', thousands='.')

#         df = df.dropna(how = 'all', axis = 0)
#         df = df.dropna(how = 'all', axis = 1)
#         df = df.reset_index(drop = True)
        
#         df.columns = df.iloc[0]
#         df = df.drop(0)

#         # df = df[~df['Instrumentcode'].str.contains('1H-25')]

#         # df1 = df[~df['Instrumentcode'].str.contains('NL ID ')]
#         # df2 = df[df['Instrumentcode'].str.contains('NL ID ')]

#         df['Date'] = pd.to_datetime(df['Delivery Start'].str[:-1], format = '%Y%m%dT%H%M%S')
#         df['Creation timestamp (GMT)'] = pd.to_datetime(df['Execution time'].str[:-5], format = '%Y%m%dT%H%M%S')

#         df = df[df['Date'].dt.year == int(y)]

#         df['Price'] = df['Price'].astype('float')
#         df['Volume'] = df['Quantity (MW)'].astype('float')

#         df2 = pd.concat([df2, df])

# df2.sort_values(by=['Date'])

# for y in yyears:
    
#     df2[df2['Date'].dt.year == int(y)].to_pickle(f"./data/ID/{y}_Intraday.pkl")

# print(colored('...','white'))
# print('')
# print(colored(f'Finished successfully', 'green'))
# print('')
# print(colored('...','white'))
# print('')

# Convert ID (pkl) to ID (df)

In [5]:
yyears = ['2015', '2016', '2017', '2018']

import sys
import calendar

dfo = pd.DataFrame()
dfo_group = pd.DataFrame()
dfo_group2 = pd.DataFrame()
dfo_group3 = pd.DataFrame()

price = []
volume = []
breaks = []

dst_switch_dates = []

for y in yyears:

    for month in [3, 10]:
            last_sunday = max(week[-1] for week in calendar.monthcalendar(int(y), month))
            dst_switch_dates.append(datetime.strptime(f'{y}-{calendar.month_abbr[month]}-{last_sunday}','%Y-%b-%d').date())

    i = yyears.index(y)

    if int(y) >= 2020:

        df = pd.read_pickle(f"./data/ID/{y}_Intraday.pkl")
        
        volume_buy = df.loc[df['Side'] == 'BUY', 'Volume'].sum()
        volume_sell = df.loc[df['Side'] == 'SELL', 'Volume'].sum()

        price_buy = df.loc[df['Side'] == 'BUY', 'Price'].sum()
        price_sell = df.loc[df['Side'] == 'SELL', 'Price'].sum()

        # df = df[df['Side'].str.contains('BUY')]

        df_group = df.drop(['Product', 'Delivery Start', 'Delivery End', 'isOTC', 'Execution time', 'Side', 'Market area', 'is Half Trade', 'Is Self Trade', 'Currency', 'Quantity (MW)'], axis=1).groupby([(df['Date'].dt.year),(df['Date'].dt.month)]).sum()
        df_group['Date'] = df_group.index
        df_group['Date'] = pd.to_datetime(df_group['Date'], format='(%Y, %m)')

        df_group2 = df.drop(['Product', 'Delivery Start', 'Delivery End', 'isOTC', 'Execution time', 'Side', 'Market area', 'is Half Trade', 'Is Self Trade', 'Currency', 'Quantity (MW)'], axis=1).groupby([(df['Date'].dt.year), (df['Date'].dt.month), (df['Date'].dt.day)]).mean()
        df_group2['Date'] = df_group2.index
        df_group2['Date'] = pd.to_datetime(df_group2['Date'], format='(%Y, %m, %d)')

        df_group3 = df.drop(['Product', 'Delivery Start', 'Delivery End', 'isOTC', 'Execution time', 'Side', 'Market area', 'is Half Trade', 'Is Self Trade', 'Currency', 'Quantity (MW)'], axis=1).groupby([(df['Date'].dt.year), (df['Date'].dt.month), (df['Date'].dt.day), (df['Date'].dt.hour)]).mean()
        df_group3['Date'] = df_group3.index
        df_group3['Date'] = pd.to_datetime(df_group3['Date'], format='(%Y, %m, %d, %H)')

        df_group = df_group.set_index(pd.DatetimeIndex(df_group['Date']))
        df_group2 = df_group2.set_index(pd.DatetimeIndex(df_group2['Date']))
        df_group3 = df_group3.set_index(pd.DatetimeIndex(df_group3['Date']))

    else:

        df = pd.read_pickle(f'./data/ID/{y}_Intraday.pkl')

        # df['Price'] = df['Price'].astype('float')
        # df['Volume'] = df['Volume'].astype('float')
        # print(df['Price'])
        # print(df['Volume'])

        df['Creation timestamp (GMT)'] = pd.to_datetime(df['Creation timestamp (GMT)'], format='%d-%m-%Y %H:%M:%S')

        df['Creation timestamp (GMT)'] = df['Creation timestamp (GMT)'].dt.tz_localize('UTC')

        # df['Creation timestamp (GMT)'] = df['Creation timestamp (GMT)'].dt.tz_convert('Europe/Amsterdam')

        # volume_buy = df.loc[df['Contracttype'] == 'Spot'].loc[df['Side'] == 'buy', 'Volume'].sum()
        # volume_sell = df.loc[df['Contracttype'] == 'Spot'].loc[df['Side'] == 'sell', 'Volume'].sum()

        # price_buy = df.loc[df['Contracttype'] == 'Spot'].loc[df['Side'] == 'buy', 'Price'].sum()
        # price_sell = df.loc[df['Contracttype'] == 'Spot'].loc[df['Side'] == 'sell', 'Price'].sum()

        df = df[df['Side'].str.contains('buy')]

        df_group = df.drop(['Instrumentcode', 'Contracttype', 'Side'], axis=1).groupby([(df['Date'].dt.year),(df['Date'].dt.month)]).sum()
        df_group['Date'] = df_group.index
        df_group['Date'] = pd.to_datetime(df_group['Date'], format = '(%Y, %m)')

        df_group2 = df.drop(['Instrumentcode', 'Contracttype', 'Side'], axis=1).groupby([(df['Date'].dt.year), (df['Date'].dt.month), (df['Date'].dt.day)]).mean()
        df_group2['Date'] = df_group2.index
        df_group2['Date'] = pd.to_datetime(df_group2['Date'], format = '(%Y, %m, %d)')

        df_group3 = df.drop(['Instrumentcode', 'Contracttype', 'Side'], axis=1).groupby([(df['Date'].dt.year), (df['Date'].dt.month), (df['Date'].dt.day), (df['Date'].dt.hour)]).mean()
        df_group3['Date'] = df_group3.index
        df_group3['Date'] = pd.to_datetime(df_group3['Date'], format = '(%Y, %m, %d, %H)')

        df_group = df_group.set_index(pd.DatetimeIndex(df_group['Date']))
        df_group2 = df_group2.set_index(pd.DatetimeIndex(df_group2['Date']))
        df_group3 = df_group3.set_index(pd.DatetimeIndex(df_group3['Date']))

    dfo = dfo.append(df, ignore_index = False)
    dfo_group = dfo_group.append(df_group, ignore_index = False)
    dfo_group2 = dfo_group2.append(df_group2, ignore_index = False)
    dfo_group3 = dfo_group3.append(df_group3, ignore_index = False)

    price.append(df['Price'].sum())
    volume.append(df['Volume'].sum())
    
    print('')
    print(colored('...','white'))
    print('')
    print(colored(f'Year: {y}', 'blue'))
    print('')
    print(colored(f'Price: {int(round(price[i]))} Euro', 'blue'))
    print(colored(f'Volume: {int(round(volume[i]))} MWh', 'blue'))
    
print('')
print(colored('...','white'))
print('')
print(colored(f'Period: {yyears[0]}–{yyears[-1]}', 'blue'))
print('')
print(colored(f'Price (sum): {int(round(sum(price)))} Euro', 'blue'))
print(colored(f'Volume (sum): {int(round(sum(volume)))} MWh', 'blue'))
print('')

print(dst_switch_dates)

dfo['Date'] = dfo['Date'].dt.tz_localize('UTC')

dfo['Time to delivery'] = dfo['Date'] - dfo['Creation timestamp (GMT)']
dfo['Time to delivery'] = dfo['Time to delivery'] / np.timedelta64(1, 'h')
# df['Time to delivery'] = (df['Date'] - df['Creation timestamp (GMT)']).astype('timedelta64[h]')

dfo.to_pickle(f'./data/ID/ID.pkl')

print(colored('...','white'))
print('')
print(colored(f'Finished successfully', 'green'))
print('')
print(colored('...','white'))
print('')


[37m...[0m

[34mYear: 2015[0m

[34mPrice: 1787837 Euro[0m
[34mVolume: 948545 MWh[0m

[37m...[0m

[34mYear: 2016[0m

[34mPrice: 2179748 Euro[0m
[34mVolume: 1451924 MWh[0m

[37m...[0m

[34mYear: 2017[0m

[34mPrice: 6980678 Euro[0m
[34mVolume: 2384586 MWh[0m

[37m...[0m

[34mYear: 2018[0m

[34mPrice: 22962765 Euro[0m
[34mVolume: 3421322 MWh[0m

[37m...[0m

[34mPeriod: 2015–2018[0m

[34mPrice (sum): 33911028 Euro[0m
[34mVolume (sum): 8206377 MWh[0m

[datetime.date(2015, 3, 29), datetime.date(2015, 10, 25), datetime.date(2016, 3, 27), datetime.date(2016, 10, 30), datetime.date(2017, 3, 26), datetime.date(2017, 10, 29), datetime.date(2018, 3, 25), datetime.date(2018, 10, 28)]
[37m...[0m

[32mFinished successfully[0m

[37m...[0m



# Calculate ID3 (df) from ID (df)

In [6]:
# import datetime

dfo = pd.read_pickle(f'./data/ID/ID.pkl')

dfo_spotonly = dfo[dfo['Contracttype'] == 'Spot']

print(dfo)

instruments = pd.DataFrame(columns = ['Instrument', 'ID3', 'ID3 (VOL)', 'IDF'])

instruments['Instrument'] = pd.date_range(f'{yyears[0]}-01-01 00:00:00', f'{yyears[-1]}-12-31 23:00:00', freq = 'h', tz = 'UTC')

# instruments['Volume'] = dfo.groupby(pd.Grouper(key = 'Date', freq = 'H')).sum()['Volume']

# instruments['Volume'] = dfo2['Volume'].resample('1H').sum()

print(instruments)

for index, row in instruments.iterrows():

    dtt = row['Instrument']

    print(dtt)

    end_dt = dtt - np.timedelta64(5,'m')
    start_dt = dtt - np.timedelta64(3,'h')

    print(start_dt, end_dt)
    print('')

    threehour = dfo.loc[(dfo['Creation timestamp (GMT)'] >= start_dt) & (dfo['Creation timestamp (GMT)'] < end_dt)]

    threehour['Pricevolume'] = threehour['Price'] * threehour['Volume']
    
    instruments['ID3'].loc[instruments['Instrument'] == dtt] = 1/threehour['Volume'].sum() * threehour['Pricevolume'].sum()

    instruments['ID3 (VOL)'].loc[instruments['Instrument'] == dtt] = threehour['Volume'].sum()
    
    instruments['IDF'].loc[instruments['Instrument'] == dtt] = 1/dfo['Volume'].loc[dfo['Date'] == dtt].sum() * (dfo['Price'].loc[dfo['Date'] == dtt] * dfo['Volume'].loc[dfo['Date'] == dtt]).sum()

    # if threehour['Volume'].sum() < 10:

55:00+00:00

2018-12-21 11:00:00+00:00
2018-12-21 08:00:00+00:00 2018-12-21 10:55:00+00:00

2018-12-21 12:00:00+00:00
2018-12-21 09:00:00+00:00 2018-12-21 11:55:00+00:00

2018-12-21 13:00:00+00:00
2018-12-21 10:00:00+00:00 2018-12-21 12:55:00+00:00

2018-12-21 14:00:00+00:00
2018-12-21 11:00:00+00:00 2018-12-21 13:55:00+00:00

2018-12-21 15:00:00+00:00
2018-12-21 12:00:00+00:00 2018-12-21 14:55:00+00:00

2018-12-21 16:00:00+00:00
2018-12-21 13:00:00+00:00 2018-12-21 15:55:00+00:00

2018-12-21 17:00:00+00:00
2018-12-21 14:00:00+00:00 2018-12-21 16:55:00+00:00

2018-12-21 18:00:00+00:00
2018-12-21 15:00:00+00:00 2018-12-21 17:55:00+00:00

2018-12-21 19:00:00+00:00
2018-12-21 16:00:00+00:00 2018-12-21 18:55:00+00:00

2018-12-21 20:00:00+00:00
2018-12-21 17:00:00+00:00 2018-12-21 19:55:00+00:00

2018-12-21 21:00:00+00:00
2018-12-21 18:00:00+00:00 2018-12-21 20:55:00+00:00

2018-12-21 22:00:00+00:00
2018-12-21 19:00:00+00:00 2018-12-21 21:55:00+00:00

2018-12-21 23:00:00+00:00
2018-12-21 20

In [7]:
display(instruments)

instruments2 = instruments

instruments = instruments.set_index('Instrument')

instruments.index.names = ['Timestamp']

for d in dst_switch_dates:
    instruments = instruments[instruments.index.date != d]

instruments['ID3'].loc[((instruments['ID3 (VOL)'].isna()) | (instruments['ID3 (VOL)'] < 10))] = instruments['IDF'].loc[((instruments['ID3 (VOL)'].isna()) | (instruments['ID3 (VOL)'] < 10))]

# VOL = dfo.groupby(pd.Grouper(key = 'Date', freq = 'H')).sum()['Volume']

# instruments = pd.merge(instruments, VOL, left_on = 'Instrument', right_index = True, how = 'outer')

DA = pd.read_pickle(f"./data/DA/DA.pkl")

instruments = pd.merge(instruments, DA, left_index = True, right_index = True)

instruments['ID3'].loc[instruments['ID3'].isna()] = instruments['MCP'].loc[instruments['ID3'].isna()]

instruments = instruments[['ID3', 'MCP']]

display(instruments)

instruments.to_pickle(f'./data/ID/ID3.pkl')

Unnamed: 0,Instrument,ID3,ID3 (VOL),IDF
0,2015-01-01 00:00:00+00:00,24.2549,245.6,26.3026
1,2015-01-01 01:00:00+00:00,28.6447,45.6,22.8818
2,2015-01-01 02:00:00+00:00,28.3007,91.8,25
3,2015-01-01 03:00:00+00:00,28.0543,92,26.8182
4,2015-01-01 04:00:00+00:00,28.1546,97,26.6842
...,...,...,...,...
35059,2018-12-31 19:00:00+00:00,68.9794,1341.3,75.2327
35060,2018-12-31 20:00:00+00:00,51.6558,741.8,55.3705
35061,2018-12-31 21:00:00+00:00,50.4283,470,52.2516
35062,2018-12-31 22:00:00+00:00,50.1262,379.8,56.4397


Unnamed: 0_level_0,ID3,MCP
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-01 00:00:00+00:00,24.2549,38.50
2015-01-01 01:00:00+00:00,28.6447,38.22
2015-01-01 02:00:00+00:00,28.3007,35.60
2015-01-01 03:00:00+00:00,28.0543,33.00
2015-01-01 04:00:00+00:00,28.1546,27.41
...,...,...
2018-12-31 19:00:00+00:00,68.9794,58.28
2018-12-31 20:00:00+00:00,51.6558,50.01
2018-12-31 21:00:00+00:00,50.4283,47.48
2018-12-31 22:00:00+00:00,50.1262,50.95
