In [1]:
import pandas as pd
import numpy as np
import os
import sys
import plotly
import plotly.graph_objects as go
    
%load_ext autoreload
%autoreload 1

pd.set_option("display.max_columns",201)
pd.set_option("display.max_colwidth",101)
pd.set_option("display.max_rows",500)

In [2]:
from arctic import Arctic, CHUNK_STORE

conn = Arctic('10.213.120.5')
conn.initialize_library('entsoe', lib_type=CHUNK_STORE)
conn.list_libraries()
lib = conn['entsoe']

  from pandas.util.testing import assert_frame_equal
  from pandas import DataFrame, Series, Panel
Library created, but couldn't enable sharding: no such command: 'enablesharding'. This is OK if you're not 'admin'


In [3]:
# Input country

country = input("Enter the perimeter (DE/FR/BE/ES/IT/PL) : ")

Enter the perimeter (DE/FR/BE/ES/IT/PL) : ES


In [4]:
# function to change timezone from UTC to local time

def changing_timezone(x):
    ts = x.index.tz_localize('utc').tz_convert('Europe/Brussels')
    y = x.set_index(ts)
    return y.tz_localize(None)

In [5]:
# define dates

from datetime import datetime
from datetime import timedelta
from datetime import date

ref_date = datetime(year=2016, month=1, day=1).date()
start_date = ref_date + timedelta(days = - 1)

end_date = date.today().replace(day=1)

In [6]:
# Read Spot price

var = 'DayAheadPrices_12.1.D'

prefix = var + '_' + country 

df_DA_price = lib.read(prefix, chunk_range=pd.date_range(start_date, end_date))

# changing timezones 
df_DA_price = changing_timezone(df_DA_price)

In [7]:
# Read installed capacity data

var =  'ActualTotalLoad_6.1.A'

prefix = var + '_' + country 

df_demand = lib.read(prefix, chunk_range=pd.date_range(start_date, end_date))

# convert 15 min data to hourly data
df_demand = df_demand.resample('H').mean()

# changing timezones 
df_demand = changing_timezone(df_demand)

In [8]:
# Read power generation data

var = 'AggregatedGenerationPerType_16.1.B_C'
prefix = var + '_' + country 

df_gen = lib.read(prefix,chunk_range=pd.date_range(start_date, end_date))

# convert 15 min data to hourly data
df_gen = df_gen.resample('H').mean()

# changing timezones 
df_gen = changing_timezone(df_gen)

In [9]:
df_DA_price =df_DA_price[~df_DA_price.index.duplicated()]
#df_RES_gen =df_RES_gen[~df_RES_gen.index.duplicated()]
df_demand =df_demand[~df_demand.index.duplicated()]
df_gen =df_gen[~df_gen.index.duplicated()]

In [10]:
# merging data to a single dataframe

var = [df_DA_price,df_demand,df_gen]     
df_merge = pd.DataFrame(columns=[])

for j in var:
    df_merge = pd.merge(df_merge, j,how='outer',right_index=True, left_index=True)
    
var = 'ActualGenerationOutput'
res_tech = ['Solar','Wind Onshore', 'Wind Offshore']

prefix = var + ' ' + country 

for i in res_tech:
    try:
        df_merge['t_cap'+' '+i] = df_merge['DayAheadPrices_'+country]*df_merge[prefix+' '+i]
    except KeyError:
        pass

In [11]:
df_data= df_merge.iloc[(df_merge.index.year >=2016)&(df_merge.index.date<end_date)]

In [12]:
df_data = df_data[df_data.columns.drop(list(df_data.filter(regex='Consumption')))]

# Features not used

In [None]:
try: 
    df_data['Residual_Load'] = df_data['ActualTotalLoad' + '_'+ country] - df_data[prefix + ' ' + 'Solar'] 
    - df_data[prefix + ' ' + 'Wind Onshore'] - df_data[prefix + ' ' + 'Wind Offshore']
    df_data['RES_penetration'] =((df_data[prefix + ' ' + 'Solar'] + df_data[prefix + ' ' + 'Wind Onshore'] +
                                df_data[prefix + ' ' + 'Wind Offshore'])/df_data['ActualTotalLoad'+'_'+country])*100
    df_data['Wind_penetration'] =((df_data[prefix + ' ' + 'Wind Onshore'] + df_data[prefix + ' ' + 'Wind Offshore'])/df_data['ActualTotalLoad'+'_'+country])*100
except KeyError:
    df_data['Residual_Load'] = df_data['ActualTotalLoad' + '_'+ country] - df_data[prefix + ' ' + 'Solar'] 
    - df_data[prefix + ' ' + 'Wind Onshore']
    df_data['RES_penetration'] =((df_data[prefix + ' ' + 'Solar'] + df_data[prefix + ' ' + 'Wind Onshore'])/df_data['ActualTotalLoad'+'_'+country])*100
    df_data['Wind_penetration'] =(df_data[prefix + ' ' + 'Wind Onshore']/df_data['ActualTotalLoad'+'_'+country])*100

df_data['Solar_penetration'] =(df_data[prefix + ' ' + 'Solar']/df_data['ActualTotalLoad'+'_'+country])*100

In [13]:
import calendar

df_d = df_data.groupby(df_data.index.date).mean()

df_m = df_data.groupby([(df_data.index.year),(df_data.index.month)]).mean()

df_y = df_data.groupby(df_data.index.year).mean()

for j in res_tech:
    try:
        df_d['cap_price' +'_'+j] = df_d['t_cap' +' '+ j]/df_d[prefix + ' ' + j]
        df_m['cap_price' +'_'+j] = df_m['t_cap' +' '+ j]/df_m[prefix + ' ' + j]
        df_y['cap_price' +'_'+j] = df_y['t_cap' +' '+ j]/df_y[prefix + ' ' + j]
    except KeyError:
        pass
    
#df_d['quarter'] = 'Q'+pd.to_datetime(df_d.index).quarter.astype(str)
#df_m['quarter'] = 'Q'+pd.to_datetime(df_m.index.get_level_values(1), format='%m').quarter.astype(str)

#Use calendar library for abbreviations and order
dd=dict((enumerate(calendar.month_abbr)))

#rename level zero of multiindex

df_m = df_m.rename(index=dd,level=1)

#Create calendar month data type with order for sorting
cal_dtype = pd.CategoricalDtype(list(calendar.month_abbr), ordered=True)

#Change the dtype of the level zero index
df_m.index = df_m.index.set_levels(df_m.index.levels[1].astype(cal_dtype), level=1)

In [14]:
for i in res_tech:
    try:
        df_m['cap_ratio_'+i] = (df_m['cap_price_'+i]/df_m['DayAheadPrices_'+country])
        df_d['cap_ratio_'+i] = (df_d['cap_price_'+i]/df_d['DayAheadPrices_'+country])
        df_y['cap_ratio_'+i] = (df_y['cap_price_'+i]/df_y['DayAheadPrices_'+country])
    except KeyError:
        pass

In [29]:
pickle_dir = 'hist_data/'

df_d.to_pickle(os.path.join(pickle_dir,'cap_price_daily_'+country+'.p'))
df_m.to_pickle(os.path.join(pickle_dir,'cap_price_monthly_'+country+'.p'))
df_y.to_pickle(os.path.join(pickle_dir,'cap_price_yearly_'+country+'.p'))

In [15]:
csv_dir = 'hist_data_csv/'

df_d.to_csv(os.path.join(csv_dir,'cap_price_daily_new_' + country+ '.csv'))
df_m.to_csv(os.path.join(csv_dir,'cap_price_monthly_new_' + country+ '.csv'))
df_y.to_csv(os.path.join(csv_dir,'cap_price_yearly_new_' + country+ '.csv'))