In [24]:
import numpy as np
import pandas as pd
import sklearn as sk
import matplotlib as plt

In [25]:
#path to the data folder, edit if necesssary
path = "./data/"

#filenames
file_power = "competitor_market_prices_power.csv"
file_gas = "competitor_market_prices_gas.csv"
clean_power = "clean_competitor_market_prices_power.csv"
clean_gas = "clean_competitor_market_prices_gas.csv"


#Price Filenames
spot_gas = "eex_spot_gas.csv"
d_ahead_gas = "eex_gas_d.csv"
y_ahead_gas = "eex_gas_y.csv"

spot_power = "epex_spot_power.csv"
d_ahead_peak = "eex_power_peak_d.csv"
y_ahead_peak = "eex_power_peak_y.csv"
d_ahead_base = "eex_power_base_d.csv"
y_ahead_base = "eex_power_base_y.csv"

In [26]:
df_gas = pd.read_csv(path + clean_gas)
df_gas.drop(columns = ['Unnamed: 0'], inplace = True)
df_gas

Unnamed: 0,date_valid_from,date_valid_to,rank,consumption_range_kwh,tariff,supplier,price_year_eur,post_code,city,energy
0,2021-09-01,2021-09-01,1,22500,Shuppet,Willy,1340.77,58636,Iserlohn,gas
1,2021-09-01,2021-09-01,6,90000,Rillaboom,Piteraq,5218.84,42277,Wuppertal,gas
2,2021-09-01,2021-09-01,44,24000,Spheal,Belaja,1743.21,38226,Salzgitter,gas
3,2021-09-01,2021-09-01,46,20000,Staraptor,Tornado,1542.88,3046,Cottbus,gas
4,2021-09-01,2021-09-01,49,80000,Combusken,Kachchab,5203.17,81737,München,gas
...,...,...,...,...,...,...,...,...,...,...
17982680,2022-09-13,2022-09-27,8,25000,Perrserker,Pulenat,10231.91,72762,Reutlingen,gas
17982681,2022-09-13,2022-09-27,9,25000,Aegislash,Sarma,11507.30,71638,Ludwigsburg,gas
17982682,2022-09-13,2022-09-27,6,2300,Ampharos,Sarma,1188.85,60439,Frankfurt,gas
17982683,2022-09-13,2022-09-27,11,20000,Ampharos,Sarma,9230.80,12627,Berlin,gas


In [27]:
df_power = pd.read_csv(path + clean_power, usecols = [1:])
df_power

Unnamed: 0.1,Unnamed: 0,date_valid_from,date_valid_to,rank,consumption_range_kwh,tariff,supplier,price_year_eur,post_code,city,energy
0,0,2022-02-19,2022-02-20,22,2400,Nidorina,Solanus,1061.77,41751,Viersen,power
1,2,2022-02-19,2022-02-20,31,3600,Grookey,Purga,1562.37,31137,Hildesheim,power
2,6,2022-02-19,2022-02-20,26,5500,Appletun,Helm,2203.24,78054,Villingen-Schwenningen,power
3,9,2022-02-19,2022-02-20,22,250,Wimpod,Wambra,234.19,14480,Potsdam,power
4,13,2022-02-19,2022-02-20,13,500,Patrat,Pulenat,362.13,41462,Neuss,power
...,...,...,...,...,...,...,...,...,...,...,...
32634484,68438551,2022-02-04,2022-02-07,31,4000,Raichu,Solanus,1927.19,8058,Zwickau,power
32634485,68438556,2022-02-09,2022-02-09,40,7800,Rookidee,Reshabar,5648.13,33332,Gütersloh,power
32634486,68438557,2022-02-09,2022-02-09,2,5600,Herdier,Joran,2278.78,8058,Zwickau,power
32634487,68438561,2022-02-19,2022-02-20,16,1000,Gothorita,Lule älv,486.76,58452,Witten,power


In [28]:
#Joining Price Data
df_spot_power = pd.read_csv(path + spot_power, usecols = [0,1])
df_y_ahead_peak = pd.read_csv(path + y_ahead_peak, usecols = [0,1])
df_y_ahead_base = pd.read_csv(path + y_ahead_base, usecols = [0,1])
df_d_ahead_peak = pd.read_csv(path + d_ahead_peak, usecols = [0,1])
df_d_ahead_base = pd.read_csv(path + d_ahead_base, usecols = [0,1])

In [29]:
#Renaming columns
df_spot_power.rename(columns = {'d': 'spot'}, inplace = True)
df_y_ahead_peak.rename(columns = {'y01': 'y_ahead_peak'}, inplace = True)
df_y_ahead_base.rename(columns = {'y01': 'y_ahead_base'}, inplace = True)
df_d_ahead_peak.rename(columns = {'d00': 'd_ahead_peak'}, inplace = True)
df_d_ahead_base.rename(columns = {'d00': 'd_ahead_base'}, inplace = True)

In [30]:
#Indexing Spot Price Data
df_spot_power.time = pd.to_datetime(df_spot_power.time)
df_spot_power['Dates'] = df_spot_power['time'].dt.date
df_spot_power.set_index(df_spot_power.time, inplace = True)
df_spot_power.drop(columns = ['time'], inplace = True)

In [31]:
#Generating Daily Peak and Base Prices
df_spot_power_peak = df_spot_power.between_time("08:00", "19:59")
df_spot_power_base = df_spot_power.between_time("20:00", "07:59")
df_spot_power_base = df_spot_power_base.groupby('Dates').mean()
df_spot_power_peak = df_spot_power_peak.groupby('Dates').mean()

#Renaming the columns and joining
df_spot_power_peak.rename(columns = {'spot': 'spot_peak'}, inplace = True)
df_spot_power_base.rename(columns = {'spot': 'spot_base'}, inplace = True)

#Joining the dataset
df_spot_power = pd.merge(df_spot_power_peak,df_spot_power_base, how='inner', left_index=True, right_index=True)
#df_spot_power['date'] = pd.to_datetime(df_spot_power.index)


In [32]:
df_spot_power.dtypes

spot_peak    float64
spot_base    float64
dtype: object

In [33]:
#Joining all price data into master data
from datetime import datetime, timedelta

#Setting join date as index for joining
df_power.date_valid_from = pd.to_datetime(df_power.date_valid_from)
df_power.index = df_power.date_valid_from - timedelta(days = 1)


#Joining all price data
price_dfs = [df_y_ahead_peak, df_y_ahead_base, df_d_ahead_peak, df_d_ahead_base]

for df in price_dfs:
    df.set_index(pd.to_datetime(df.date), inplace = True)
    df.drop(columns = ['date'], inplace = True)
    df_spot_power = df_spot_power.join(df)
    
df_prices = df_spot_power
    


In [34]:
#Joining all prices with df_power
df_power = df_power.join(df_prices)

In [35]:
df_power

Unnamed: 0.1,Unnamed: 0,date_valid_from,date_valid_to,rank,consumption_range_kwh,tariff,supplier,price_year_eur,post_code,city,energy,spot_peak,spot_base,y_ahead_peak,y_ahead_base,d_ahead_peak,d_ahead_base
2021-05-24,5978519,2021-05-25,2021-09-21,3,500,Meltan,Criador,257.85,51469,Bergisch Gladbach,power,26.460833,36.83,74.6,64.2,26.46,31.65
2021-05-24,21583918,2021-05-25,2021-09-22,2,250,Meltan,Criador,183.92,51469,Bergisch Gladbach,power,26.460833,36.83,74.6,64.2,26.46,31.65
2021-05-24,38773927,2021-05-25,2021-09-22,2,250,Meltan,Criador,183.92,51381,Leverkusen,power,26.460833,36.83,74.6,64.2,26.46,31.65
2021-05-24,47075103,2021-05-25,2021-09-22,3,250,Meltan,Criador,185.12,51103,Köln,power,26.460833,36.83,74.6,64.2,26.46,31.65
2021-05-24,56655574,2021-05-25,2021-09-21,3,500,Meltan,Criador,260.25,51103,Köln,power,26.460833,36.83,74.6,64.2,26.46,31.65
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-10-14,67234438,2022-10-15,9999-12-31,8,9000,Gothorita,Lule älv,6568.43,58636,Iserlohn,power,283.841667,237.45,,,,
2022-10-14,67234439,2022-10-15,9999-12-31,10,4000,Tsareena,Bergwind,2869.89,37085,Göttingen,power,283.841667,237.45,,,,
2022-10-14,67234442,2022-10-15,9999-12-31,19,2000,Lurantis,Purga,1686.23,46049,Oberhausen,power,283.841667,237.45,,,,
2022-10-14,67234673,2022-10-15,9999-12-31,14,5600,Banette,Werra,4418.69,56070,Koblenz,power,283.841667,237.45,,,,
