In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import matplotlib_inline
import numpy as np
import pandas as pd
import seaborn as sns

import is3_broker_rl
from is3_broker_rl.api import Action, Reward

%matplotlib inline
matplotlib_inline.backend_inline.set_matplotlib_formats("retina")
sns.set_style("whitegrid")

In [None]:
DATA_DIR = Path(is3_broker_rl.__file__).parent.parent / "data"

In [None]:
# Papermill parameter cell
# Name of the directory to analyse. Should contain consumption_action.csv and consumption_reward.csv.
ANALYSIS_DIR_NAME = ""

In [None]:
ANALYSIS_DIR = DATA_DIR / ANALYSIS_DIR_NAME

In [None]:
df_action = pd.read_csv(ANALYSIS_DIR / "consumption_action.csv")
df_reward = pd.read_csv(ANALYSIS_DIR / "consumption_reward.csv")

In [None]:
ACTION_VALUE_MAPPING = {a.name: a.value for a in Action}

In [None]:
# Fixes the issue that the experiment scheduler always assigns the same gameId
def write_unique_game_id(df):
    new_game_start_indices = df[df["timeslot"] < df["timeslot"].shift()].index
    if len(new_game_start_indices) > 0:
        df.loc[: new_game_start_indices[0], "gameId"] = f"game0"
        for i in range(len(new_game_start_indices) - 1):
            df.loc[new_game_start_indices[i] : new_game_start_indices[i + 1], "gameId"] = f"game{i + 1}"
        df.loc[new_game_start_indices[-1] :, "gameId"] = f"game{len(new_game_start_indices)}"

In [None]:
write_unique_game_id(df_reward)
write_unique_game_id(df_action)

In [None]:
def plot_action_counts(df_action):
    fig, ax = plt.subplots(figsize=(16, 9))
    sns.countplot(x="action", data=df_action, ax=ax, order=ACTION_VALUE_MAPPING.values())
    ax.set_xticklabels(ACTION_VALUE_MAPPING.keys())
    ax.set_xlabel("Action type")
    ax.set_ylabel("Count")
    ax.set_title("Action distribution")

In [None]:
plot_action_counts(df_action)

In [None]:
def plot_action_count_over_time(df_action, df_reward):
    fig, axs = plt.subplots(figsize=(21, 14), nrows=len(ACTION_VALUE_MAPPING), sharey=True)
    action_value_counts = (
        df_action[["gameId", "action"]]
        .groupby(["gameId", "action"], sort=False)
        .size()
        .unstack(fill_value=0)
        .reset_index(drop=True)
    )
    timeslots_per_game = df_reward.groupby("gameId", sort=False).count()["timeslot"].reset_index(drop=True)
    action_per_reward = np.ceil(len(df_reward) / len(df_action))
    action_value_fractions = action_value_counts.divide(timeslots_per_game, axis=0) * action_per_reward

    for i, action_value in enumerate(ACTION_VALUE_MAPPING.values()):
        ax = axs.flat[i]
        sns.lineplot(data=action_value_fractions[action_value], ax=ax)
        ax.set_title(
            f"Fraction of action={list(ACTION_VALUE_MAPPING.keys())[list(ACTION_VALUE_MAPPING.values()).index(action_value)]} over time for each game"
        )
        ax.set_xlabel("Game number")
        ax.set_ylabel("Action count fraction")
        ax.set_ylim((0, action_value_fractions.max().max()))
        ax.set_xticks(action_value_fractions.index)
        ax.margins(x=0)
    fig.tight_layout()

In [None]:
plot_action_count_over_time(df_action, df_reward)

In [None]:
def timestep_reg_plot(df_reward, column, window_size=168):
    fig, ax = plt.subplots(figsize=(16, 9))
    sns.lineplot(data=df_reward[column].rolling(window_size).mean(), label=f"Mean {column}", ax=ax)
    sns.regplot(
        x=df_reward.index,
        y=column,
        data=df_reward[[column]].rolling(window_size).mean(),
        scatter=False,
        marker=".",
        ax=ax,
        label="OLS fit",
    )
    ax.vlines(
        df_reward[df_reward["gameId"] != df_reward["gameId"].shift()].index + window_size,
        0,
        1,
        transform=ax.get_xaxis_transform(),
        colors="r",
        alpha=0.2,
        label="New game",
    )
    ax.set_xlabel("Timeslot")
    ax.set_ylabel(column)
    ax.set_title(f"Rolling mean {column} with window size {window_size}")
    ax.legend()

In [None]:
timestep_reg_plot(df_reward, "consumption_profit", window_size=3500)

In [None]:
timestep_reg_plot(df_reward, "cashPosition", window_size=3500)

In [None]:
def plot_final_cash_position(df_reward):
    fig, ax = plt.subplots(figsize=(16, 9))
    df_final_timeslot = df_reward[df_reward["gameId"].shift(-1) != df_reward["gameId"]].reset_index()
    df_final_timeslot["cashPosition"] = (
        df_final_timeslot["cashPosition"] / df_final_timeslot["timeslot"].max() * df_final_timeslot["timeslot"]
    )
    sns.lineplot(
        x=df_final_timeslot.index, y="cashPosition", data=df_final_timeslot, label="Final cash position", ax=ax
    )
    sns.regplot(
        x=df_final_timeslot.index,
        y="cashPosition",
        data=df_final_timeslot,
        scatter=False,
        marker=".",
        ax=ax,
        label="OLS fit",
    )
    ax.set_xlabel("Game")
    ax.set_ylabel("Cash position")
    ax.set_title(f"Final cash position per game (adjusted for game length)")
    ax.set_xticks(df_final_timeslot.index)
    ax.margins(x=0)
    ax.legend()

In [None]:
plot_final_cash_position(df_reward)

In [None]:
timestep_reg_plot(df_reward, "capacity_costs", window_size=3500)

In [None]:
timestep_reg_plot(df_reward, "consumption_fees", window_size=3500)

In [None]:
timestep_reg_plot(df_reward, "balancing_costs", window_size=3500)

In [None]:
timestep_reg_plot(df_reward, "reward", window_size=3500)

In [None]:
def plot_mean_episode_reward_reg(df_reward, df_action, window_size=10):
    fig, ax = plt.subplots(figsize=(21, 11))
    df_mean_episode_reward = df_reward.groupby("episode_id", sort=False).mean().reset_index()
    sns.lineplot(
        x=df_mean_episode_reward.index,
        y="reward",
        data=df_mean_episode_reward[["reward"]].rolling(window_size).mean(),
        label="Mean episode reward",
        ax=ax,
    )
    sns.regplot(
        x=df_mean_episode_reward.index,
        y="reward",
        data=df_mean_episode_reward[["reward"]].rolling(window_size).mean(),
        scatter=False,
        marker=".",
        ax=ax,
        label="OLS fit",
    )
    df_episode_game_id = df_reward.groupby("episode_id", sort=False)["gameId"].first().reset_index()
    ax.vlines(
        df_episode_game_id[df_episode_game_id["gameId"].shift() != df_episode_game_id["gameId"]].index + window_size,
        0,
        1,
        transform=ax.get_xaxis_transform(),
        colors="r",
        alpha=0.3,
        label="New game",
    )
    ax.set_xlabel("Episode")
    ax.set_ylabel("Aggregated reward")
    ax.legend()
    ax.set_title(
        f"Mean episode reward (N_EPISODES: {df_reward['episode_id'].nunique()}, N_TIMESTEPS: {len(df_action)})"
    )
    ax.margins(x=0)

In [None]:
plot_mean_episode_reward_reg(df_reward, df_action, window_size=50)

In [None]:
def plot_mean_reward_per_action(df_reward):
    fig, ax = plt.subplots(figsize=(21, 11))
    df_reward = df_reward.copy().dropna()  # Avoid overwriting original df
    df_reward["last_action"] = df_reward["last_action"].astype(np.uint8)
    mean_reward_per_action = df_reward[["last_action", "reward"]].groupby("last_action").mean()
    sns.barplot(
        x=mean_reward_per_action.index,
        y="reward",
        data=mean_reward_per_action,
        ax=ax,
        order=ACTION_VALUE_MAPPING.values(),
    )
    ax.set_xticklabels(ACTION_VALUE_MAPPING.keys())
    ax.set_xlabel("Action")
    ax.set_ylabel("Mean reward associated with the action")
    ax.set_title(f"Mean reward per action")

In [None]:
plot_mean_reward_per_action(df_reward)

In [None]:
def plot_reward_correlation_matrix(df_reward):
    fig, ax = plt.subplots(figsize=(14, 14))
    corr = df_reward.drop(columns=["timeslot", "gameId", *Reward.__fields__.keys()]).corr()
    mask = np.triu(np.ones_like(corr, dtype=bool))
    cmap = sns.diverging_palette(230, 20, as_cmap=True)
    sns.heatmap(corr, mask=mask, cmap=cmap, annot=True, square=True, ax=ax)
    ax.set_title("Correlation of state space and reward function")

In [None]:
plot_reward_correlation_matrix(df_reward)