In [54]:
import numpy as np
import pandas as pd
from pandas.core.arrays.period import timedelta

import plotly.graph_objs as go
import plotly.express as px
import plotly.io as pio

In [55]:
#Helper functions
def rename_prices(df):
    if 'PRICES' in df.columns:
        df.rename(columns={"PRICES": "datetime"}, inplace = True)
    else:
        print("There's no column PRICES.")


def dataformatting(df):
    #wide to long
    df = df.melt(id_vars=['datetime'], value_vars=df.columns[1:25]).sort_values(['datetime', 'variable'])
    df.reset_index(inplace=True, drop=True)
    
    #creating master time column, ulgy but works
    time = df['datetime'].copy()
    for d in range(len(df['datetime'])):
        time[d] = df['datetime'][d]+timedelta(hours = d%24) #decided not to go for the +1, so hour 1 is midnight, makes more sense, now it ends in 2009, otherwise the last measurement was 01.01.2010 00:00:00
    df['time'] = time
    
    #hour from string to int
    df['variable'] = df['variable'].map(lambda x:int(x[-2:]))
    
    #renaming, shullfing columns (not important)
    df.rename(columns={"datetime": "date", "variable": "hour", "value":"price"}, inplace = True)
    cols = df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    df = df[cols]
    
    return df

def drop_n_last_rows(df, n):
    df.drop(df.tail(1).index,inplace=True)
    
def conv(fld):
    if fld.endswith(b']'):
        return float(fld[:-1])
    elif fld.startswith(b'['):
        return float(fld[1:])
    else:
        return float(fld)
    
def load_validation_results(txt_file_name:str):
    val = pd.read_excel('validate.xlsx')
    rename_prices(val)
    val = dataformatting(val)
    
    filename = txt_file_name
    
    # Drop the last row from validation data due to lack of action/reward taken when in that state 
    drop_n_last_rows(val, n=1)
    

    # To load the actions (reward/mkt_price)
    action_results = np.loadtxt(filename, delimiter=',', skiprows=3,max_rows = 1, dtype=float, converters=conv)
    val['action'] = action_results

    # To load the rewards
    reward_results = np.loadtxt(filename, delimiter=',', skiprows=1,max_rows = 1, dtype=float, converters=conv)
    val['reward'] = reward_results
    
    return val

# Baseline plots (OLD)

In [56]:
def buy_sell_hodl_plot(df, howmanydays=5):
    howmanyhours=howmanydays*24
    max_range=len(df)-howmanyhours
    ri = np.random.randint(0, max_range)

    trace=dict(type='scatter',
              x=df.time[ri:ri+howmanyhours],
              y=df.price[ri:ri+howmanyhours],
              mode='lines+markers',
              marker=dict(color= df.action[ri:ri+howmanyhours], 
                          colorscale='Cividis', size=7, colorbar=dict(thickness=20,tickvals=[df.action[ri:ri+howmanyhours].max(), 0.0, df.action[ri:ri+howmanyhours].min()], ticktext=['Sell', 'Hold','Buy'])))


    axis_style=dict(zeroline=False, showline=True, mirror=True)
    layout=dict(width=900, height=600, 
                title=f'Actions taken vs. energy prices between {df.time[ri].day} {df.time[ri].month_name()} - {df.time[ri+howmanyhours-1].day} {df.time[ri+howmanyhours-1].month_name()} {df.time[ri+howmanyhours-1].year}',
                xaxis=axis_style,
                yaxis=axis_style,
                yaxis_title="Energy market price (Euro/MWh)",
               hovermode='closest', showlegend=False)
    fig=go.FigureWidget(data=[trace], layout=layout)
    
    # WARNING! It will overwrite the previous plot (on different data)
    fig.write_image("./plots/actions_on_prices_plot.png", scale=5)
    fig.show()

def reward_sum_monthly_plot(df):
    df['reward_unscaled'] = df['price']*df['action']
    fig = px.histogram(df, x="time", y="reward_unscaled", histfunc="sum", title="Rewards sum per month throughout 2 years of validation ")
    fig.update_traces(xbins_size="M1")
    fig.update_xaxes(showgrid=True, ticklabelmode="period", dtick="M1", tickformat="%b\n%Y")
    fig.update_layout(bargap=0.2, xaxis_title="Date (monthly bins)", yaxis_title="Sum of rewards")
    
    # WARNING! It will overwrite the previous plot (on different data)
    fig.write_image("./plots/monthly_rewars_sum_plot.png", scale=5)
    fig.show()
    
def cumsum_plot(df, monthly:bool=False):
    df['reward_unscaled'] = df['price']*df['action']
    df['cumsum_reward']=df['reward_unscaled'].cumsum()
    grouped=df.groupby([df.time.dt.year, df.time.dt.month, df.time.dt.day]).max()
    
    if monthly:
        fig = px.histogram(grouped, x='time', y="cumsum_reward",  histfunc="max",
                     title = "Cumulative sum of rewards earned throughtout 2 years of validation", 
                     labels={"time": "Dates",
                             "cumsum_reward": "Total reward",
                         }) 
        fig.update_traces(xbins_size="M1")
        fig.update_traces(marker_color='crimson')
        fig.update_xaxes(showgrid=True, ticklabelmode="period", dtick="M1", tickformat="%b\n%Y")
        fig.update_layout(bargap=0.02, xaxis_title="Date (monthly bins)", yaxis_title="Total reward")

        fig.show()
    else:
        fig = px.bar(grouped, x='time', y="cumsum_reward",
                     title = "Cumulative sum of rewards earned throughtout 2 years of validation", 
                     labels={"time": "Dates",
                             "cumsum_reward": "Total reward",
                         }) 
        fig.update_traces(marker_color='crimson')
        fig.update_layout(bargap=0.02)
        
        # WARNING! It will overwrite the previous plot (on different data)
        fig.write_image("./plots/cumsum_rewards_plot.png", scale=5)
        fig.show()
        

## Results on archived_results/baseline_result_old(discrete).txt

In [57]:
val=load_validation_results('archived_results/baseline_result_old(discrete).txt')
buy_sell_hodl_plot(val, 14)
reward_sum_monthly_plot(val)
cumsum_plot(val, False)  

# Baseline Tabular Q-Learning

In [58]:
# Ploting functions

def cumsum_plot_final(df, bigBool):
    df=df.copy()

    #loading validation original data to transfer the dates
    val = pd.read_excel('validate.xlsx')
    rename_prices(val)
    val = dataformatting(val)

    df['time']=val['time']
    df['cumsum_reward']=df['reward'].cumsum()
    grouped=df.groupby([df.time.dt.year, df.time.dt.month, df.time.dt.day]).max()

    if bigBool:
        fig = px.bar(grouped, x='time', y="cumsum_reward",
                        title = "Cumulative sum of rewards earned throughtout 2 years of validation<br>(additional features)", 
                        labels={"time": "Dates",
                                "cumsum_reward": "Total reward",
                            }) 
    else:
        fig = px.bar(grouped, x='time', y="cumsum_reward",
                        title = "Cumulative sum of rewards earned throughtout 2 years of validation<br>(basic features)", 
                        labels={"time": "Dates",
                                "cumsum_reward": "Total reward",
                            }) 

    fig.update_traces(marker_color='crimson')
    fig.update_layout(bargap=0.02)

    return fig

def buy_sell_hodl_plot_final(df, bigBool, howmanydays=5):
    df=df.copy()

    #loading validation original data to transfer the dates
    val = pd.read_excel('validate.xlsx')
    rename_prices(val)
    val = dataformatting(val)

    df['time']=val['time']
    df['price']=val['price']
    howmanyhours=howmanydays*24
    max_range=len(df)-howmanyhours
    ri = np.random.randint(0, max_range)

    trace=dict(type='scatter',
              x=df.time[ri:ri+howmanyhours],
              y=df.price[ri:ri+howmanyhours],
              mode='lines+markers',
              marker=dict(color= df.action[ri:ri+howmanyhours], 
                          colorscale='Bluered', size=7, colorbar=dict(thickness=20,tickvals=[df.action[ri:ri+howmanyhours].max(), 0.0, df.action[ri:ri+howmanyhours].min()], ticktext=['Sell', 'Hold','Buy'])))


    axis_style=dict(zeroline=False, showline=True, mirror=True)

    if bigBool:
        layout=dict(width=900, height=600, 
                    title=f'Actions taken vs. energy prices between {df.time[ri].day} {df.time[ri].month_name()} - {df.time[ri+howmanyhours-1].day} {df.time[ri+howmanyhours-1].month_name()} {df.time[ri+howmanyhours-1].year}<br>(additional features)',
                    xaxis=axis_style,
                    yaxis=axis_style,
                    yaxis_title="Energy market price (Euro/MWh)",
                    hovermode='closest', showlegend=False)
    else:
        layout=dict(width=900, height=600, 
                    title=f'Actions taken vs. energy prices between {df.time[ri].day} {df.time[ri].month_name()} - {df.time[ri+howmanyhours-1].day} {df.time[ri+howmanyhours-1].month_name()} {df.time[ri+howmanyhours-1].year}<br>(basic features)',
                    xaxis=axis_style,
                    yaxis=axis_style,
                    yaxis_title="Energy market price (Euro/MWh)",
                    hovermode='closest', showlegend=False)
                    
    fig=go.FigureWidget(data=[trace], layout=layout)
    
    return fig

### Cumulative sum of rewards

In [59]:
#loading results for cumulative reward
cum_rew_small_tabular=pd.read_csv('archived_results/tabular_q/cummulative_rewards_tab.csv')

# Cumlative sum of rewards plot
fig = cumsum_plot_final(cum_rew_small_tabular, False)
# WARNING! It will overwrite the previous plot (on different data)
fig.write_image("./plots/final/tabular_q_cumsum_reward_plot.png", scale=5)
fig.show()

### Action energy prices plot

In [60]:
fig = buy_sell_hodl_plot_final(cum_rew_small_tabular, False)
# WARNING! It will overwrite the previous plot (on different data)
fig.write_image("./plots/final/tabular_q_actions_on_prices_plot.png", scale=5)
fig.show()

# DDQN Plots

## Data small

### Cumulative sum of rewards

In [61]:
#loading results for cumulative reward
cum_rew_small=pd.read_csv('archived_results/ddqn_small/cummulative_rewards.csv')

# Cumlative sum of rewards plot
fig = cumsum_plot_final(cum_rew_small, False)
# WARNING! It will overwrite the previous plot (on different data)
fig.write_image("./plots/final/ddqn_small_cumsum_reward_plot.png", scale=5)
fig.show()


### Action energy prices plot

In [62]:
fig = buy_sell_hodl_plot_final(cum_rew_small, False)
# WARNING! It will overwrite the previous plot (on different data)
fig.write_image("./plots/final/ddqn_small_actions_on_prices_plot.png", scale=5)
fig.show()

### Train vs. Val rewards plot

In [63]:
vs_small=pd.read_csv('archived_results/ddqn_small/train_val_rewards.csv')
fig = px.line(vs_small, x="step", y=["train_reward", "val_reward" ], title='Train vs validation reward, reported every 2000 steps<br>(basic features) ', markers=False, labels={
                     "value": "Reward",
                     "step": "Steps in environment",
                     "variable": "Dataset"})

# WARNING! It will overwrite the previous plot (on different data)
fig.write_image("./plots/final/ddqn_small_train_vs_val_reward.png", scale=5)
fig.show()

## Data big

In [64]:
#loading results for cumulative reward
cum_rew_big=pd.read_csv('archived_results/ddqn_big/cummulative_rewards.csv')
cum_rew_big.head(5)


Unnamed: 0,reward,action,step
0,-24.27975,-1.839375,0
1,-24.169387,-1.839375,1
2,-16.924294,-1.430625,2
3,0.0,0.0,3
4,0.0,0.0,4


### Cumulative sum of rewards

In [65]:
fig = cumsum_plot_final(cum_rew_big, True)
# WARNING! It will overwrite the previous plot (on different data)
fig.write_image("./plots/final/ddqn_big_cumsum_rewards_plot.png", scale=5)
fig.show() 

### Action energy prices plot

In [66]:
fig = buy_sell_hodl_plot_final(cum_rew_big, True)
# WARNING! It will overwrite the previous plot (on different data)
fig.write_image("./plots/final/ddqn_big_actions_on_prices_plot.png", scale=5)
fig.show()

### Train vs. Val rewards plot

In [67]:
vs_big=pd.read_csv('archived_results/ddqn_big/train_val_rewards.csv')
vs_big.head()

Unnamed: 0,train_reward,val_reward,step
0,52697.754978,23215.747508,2000
1,168036.368077,54534.301346,4000
2,169057.337859,57461.547059,6000
3,170201.238141,56386.934398,8000
4,186258.169541,56681.910061,10000


In [68]:
fig = px.line(vs_big, x="step", y=["train_reward", "val_reward" ], title='Train vs validation reward, reported every 2000 steps<br>(additional features) ', markers=False, labels={
                     "value": "Reward",
                     "step": "Steps in environment",
                     "variable": "Dataset"})

# WARNING! It will overwrite the previous plot (on different data)
fig.write_image("./plots/final/ddqn_big_train_vs_val_reward.png", scale=5)
fig.show()

Comparison Plot - Cummulative Reward Between 3 Models (Tab, Continous(basic), Continous (additional))

In [69]:
cont_big_rewards_df = pd.read_csv('archived_results/ddqn_big/cummulative_rewards.csv')
cont_small_rewards_df = pd.read_csv('archived_results/ddqn_small/cummulative_rewards.csv')
tabular_q_rewards_df = pd.read_csv('archived_results/tabular_q/cummulative_rewards_tab.csv')

In [70]:
def add_cummulative_reward_col(df:pd.DataFrame) -> pd.DataFrame:
    new_df = df.copy()
    new_df['cum_reward'] = new_df['reward'].cumsum()
    return new_df

In [71]:
cont_big_rewards_df = add_cummulative_reward_col(cont_big_rewards_df)
cont_small_rewards_df = add_cummulative_reward_col(cont_small_rewards_df)
tabular_q_rewards_df = add_cummulative_reward_col(tabular_q_rewards_df)

In [72]:
print(cont_big_rewards_df['cum_reward'][len(cont_big_rewards_df)-1])
print(cont_small_rewards_df['cum_reward'][len(cont_big_rewards_df)-1])
print(tabular_q_rewards_df['cum_reward'][len(cont_big_rewards_df)-1])


69902.06128049998
78918.89008049993
47807.24050574991


In [73]:
combined_df = pd.DataFrame(data={'rewards_big':cont_big_rewards_df['cum_reward'],'rewards_small':cont_small_rewards_df['cum_reward'],'rewards_tab':tabular_q_rewards_df['cum_reward'],'steps':cont_big_rewards_df['step']})

In [74]:
fig = px.line(combined_df, x="steps", y=["rewards_big", "rewards_small", "rewards_tab" ], title='Comparison of cummulative rewards over validation set across models', markers=False, labels={
                     "value": "Reward",
                     "steps": "Steps in environment",
                     "variable": "Dataset"})
fig.show()
fig.write_image("./plots/final/comparison_cummulative_rewards.png", scale=5)
