In [41]:
%matplotlib notebook
import gym
import plotly.graph_objects as go
from gym import spaces
import numpy as np
import os
from ipywidgets import widgets
import torch
from IPython.display import display
import pandas as pd
from stable_baselines3 import PPO, DQN
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from stable_baselines3.common.env_checker import check_env
from collections import deque

SLIDE_WINDOW_SIZE = 25  # size of the slide window 1 if default statefunction is chosen
n_steps = 25

def defaultState(timeseries, cursor, action):
    """
    :param timeseries:
    :param cursor: the position where in the TimeSeries we are currently
    :return: The Value of the current position, states with the same value are treated the same way
    """
    state = np.asarray([np.float64(timeseries['value'][cursor]), 1 if action == 1 else 0])
    return state

def defaultReward(state, timeseries, cursor, action, path=None):
    if state[1] == 1 and timeseries['anomaly'][cursor] == 1:
        if action == 1:
            return 1
        if action == 0:
            return -1
    if state[1] == 1 and timeseries['anomaly'][cursor] == 0:
        if action == 1:
            return -1
        if action == 0:
            return 1
    if state[1] == 0 and timeseries['anomaly'][cursor] == 1:
        if action == 1:
            return 1
        if action == 0:
            return -1
    if state[1] == 0 and timeseries['anomaly'][cursor] == 0:
        if action == 1:
            return -1
        if action == 0:
            return 1

def SlideWindowStateFuc(timeseries, timeseries_cursor, action=None):
    if timeseries_cursor >= SLIDE_WINDOW_SIZE:
        return [timeseries['value'][i + 1]
                for i in range(timeseries_cursor - SLIDE_WINDOW_SIZE, timeseries_cursor)]
    else:
        return np.zeros(SLIDE_WINDOW_SIZE)

def SlideWindowRewardFuc(state, timeseries, timeseries_cursor, action, path=None):
    p = np.array(path)
    window = np.array(timeseries['anomaly'][timeseries_cursor - SLIDE_WINDOW_SIZE + 1:timeseries_cursor + 1])
    #print(window)
    #print(p)
    if timeseries_cursor >= SLIDE_WINDOW_SIZE:
        if np.array_equal(p,window):
            return 1
        else:
            return -1
    else:
        return 0

class CustomEnv(gym.Env):
  """Custom Environment that follows gym interface"""
  metadata = {'render.modes': ['human']}

  def __init__(self, statefunction=defaultState, rewardfunction=defaultReward, scaler = MinMaxScaler(), file="real_5.csv" ,dir="./series/train/", verbose=True):
    super(CustomEnv, self).__init__()
    self.filename = file
    self.file = os.path.join(dir + self.filename)
    self.cursor = -1
    self.cursor_init = 0
    self.statefunction = statefunction
    self.rewardfunction = rewardfunction
    self.scaler = scaler
    self.actions = []
    self.figure = go.FigureWidget()
    self.path = deque([], maxlen=SLIDE_WINDOW_SIZE)

    self.timeseries_labeled = pd.read_csv(os.path.join(dir + file), usecols=[1, 2], header=0, sep=",",
                                          names=['value', 'anomaly'],
                                          encoding="utf-8")
    self.action_space = spaces.Discrete(2)
    self.observation_space = spaces.Box(low=0.0, high=1.0,
                                        shape=(2,), dtype=np.float32)
    if verbose:
        print(self.__str__())

  def step(self, action):
    if len(self.path) >= self.path.maxlen:
        oldest = self.path.pop()
    if len(self.path) < self.path.maxlen and self.cursor >= SLIDE_WINDOW_SIZE:
        self.path.appendleft(action)
    else:
        self.path.appendleft(0)
    state = self.statefunction(self.timeseries_labeled, self.cursor, action)
    reward = self.rewardfunction(state, self.timeseries_labeled, self.cursor, action, self.path)
    self.actions.append(action)
    self.cursor += 1
    if self.cursor >= self.timeseries_labeled['value'].size:
        done = True
    else:
        done = False
    #print(state, action, reward, done)
    return state, reward, done, {"true_label": self.timeseries_labeled['anomaly'][self.cursor - 1]}

  def reset(self):
    self.cursor = self.cursor_init
    self.actions = []
    self.path.clear()
    self.normalize_timeseries()
    init_state = self.statefunction(self.timeseries_labeled, self.cursor, 0)
    return init_state

  def render(self, plot, mode='human'):
    if self.cursor == 1:
        plot.add_scatter()
        plot.add_scatter()
        plot.add_scatter()
    if self.cursor > 1:
        series = pd.DataFrame(self.timeseries_labeled).iloc[:self.cursor]
        series["actions"] = self.actions[:self.cursor]
        with plot.batch_update():
            plot.data[0].y = series["actions"]
            plot.data[0].name = "Actions"
            plot.data[1].y = series["value"]
            plot.data[1].name = "Value"
            plot.data[2].y = series["anomaly"]
            plot.data[2].name = "Anomaly"

  def close (self):
    pass

  def __str__(self):
    """
    :return: String Representation of the TimeSeriesEnvironment Class, mainly for debug information
    """
    return "TimeSeries from: {}\n Header(labeled):\n {} \nRows:\n " \
           "{}\nMeanValue:\n {}\nMaxValue:\n {}\nMinValue:\n {}".format(
        self.filename,
        self.timeseries_labeled.head(
            3),
        self.timeseries_labeled.shape[0],
        round(self.timeseries_labeled["value"].mean(), 2),
        round(self.timeseries_labeled["value"].max(), 2),
        round(self.timeseries_labeled["value"].min(), 2))

  def normalize_timeseries(self):
    self.timeseries_labeled["value"] = self.scaler.fit_transform(self.timeseries_labeled[["value"]])



Test if environment is legit

In [42]:
env = CustomEnv()
# It will check your custom environment and output additional warnings if needed
check_env(env)

TimeSeries from: real_5.csv
 Header(labeled):
    value  anomaly
0   2109        0
1   3229        0
2   3637        0 
Rows:
 1439
MeanValue:
 2843.37
MaxValue:
 83955
MinValue:
 1170


In [43]:
def evaluate(model, num_episodes=1):
    """
    Evaluate a RL agent
    :param model: (BaseRLModel object) the RL Agent
    :param num_episodes: (int) number of episodes to evaluate it
    :return: (float) Mean reward for the last num_episodes
    """
    # This function will only work for a single Environment
    env = model.get_env()
    # Using an Array to track all rewards over all episodes
    all_episode_rewards = []
    all_episode_actions = []
    for i in range(num_episodes):
        # testing for each episode on complete run until the environment is done
        episode_rewards = []
        episode_actions = []
        done = False
        # get the first observation out of the environment
        obs = env.reset()
        while not done:
            # _states are only useful when using LSTM policies
            action, _states = model.predict(obs)
            # here, action, rewards and dones are arrays
            # because we are using vectorized env
            obs, reward, done, info = env.step(action)
            episode_actions.append(int(action))
            episode_rewards.append(reward)

        all_episode_actions.append(episode_actions)
        all_episode_rewards.append(sum(episode_rewards))
        best_episode_idx = np.argmax(all_episode_rewards)
        best_episode_actions = all_episode_actions[best_episode_idx]

    print("Maximum Reward: ", np.max(all_episode_rewards),
          "\nAverage Reward: ", np.mean(all_episode_rewards), "\n TestEpisodes: ", num_episodes)
    plot_result(model.get_env(), best_episode_actions)

def plot_result(env, actions):
    series = pd.DataFrame(env.get_attr("timeseries_labeled")[0])
    plt.figure(figsize=(10, 7))
    plt.plot(series.index , actions, label="Actions", linestyle="solid")
    plt.plot(series.index , series["anomaly"] , label="True Label", linestyle="dotted")
    plt.plot(series.index , series["value"] , label="Series", linestyle="dashed")
    plt.legend()
    plt.ylabel('Reward Sum')
    plt.show()

As we can see our agent has too few information in its state to approximate the correct value function
therefore we are trying to increase the state information by introducing a sliding window state

In [44]:
log = "./dqn_tensorboard/"
model = DQN("MlpPolicy", env, learning_rate=0.0001, buffer_size=50000, learning_starts=5000, batch_size=256, tau=1.0, gamma=0.99, 
            train_freq=4, gradient_steps=1, n_episodes_rollout=- 1, optimize_memory_usage=False, target_update_interval=10, exploration_fraction=0.1,
            exploration_initial_eps=1.0, exploration_final_eps=0.0, max_grad_norm=10, 
            tensorboard_log=None, create_eval_env=False, policy_kwargs=None, verbose=1, seed=None, device='auto', _init_setup_model=True)
model.learn(total_timesteps=100000, tb_log_name="first_run")
evaluate(model, num_episodes=10)

Using cuda device
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/            |          |
|    exploration rate | 0.424    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 2967     |
|    time_elapsed     | 1        |
|    total timesteps  | 5756     |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.495    |
|    n_updates        | 188      |
----------------------------------
----------------------------------
| rollout/            |          |
|    exploration rate | 0        |
| time/               |          |
|    episodes         | 8        |
|    fps              | 1121     |
|    time_elapsed     | 10       |
|    total timesteps  | 11512    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 2.74     |
|    n_updates        | 1627     |
----------------------------------
----------------------------------
| 

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [45]:
plot = model.env.get_attr("figure")[0]
plot

FigureWidget({
    'data': [], 'layout': {'template': '...'}
})

We can still observe the behavior, that our agent cannot detect hard cuts in falling anomalies as these
are basically not detectable with the current state representation.

Next we will try out the Binary State Function collecting all States in our Trace


In [46]:
state = env.reset()
while True:
    action, _ = model.predict(state, deterministic=True)
    state, reward, done, info = env.step(action)
    env.render(plot)
    if done:
        break

In [47]:
from mpl_toolkits.mplot3d import Axes3D

def debug_states(model):
    """
    Evaluate a RL agent
    :param model: (BaseRLModel object) the RL Agent
    :param num_episodes: (int) number of episodes to evaluate it
    :return: (float) Mean reward for the last num_episodes
    """
    # This function will only work for a single Environment
    env = model.get_env()
    # Using an Array to track all rewards over all episodes
    observation = []
    for i in range(1):
        # testing for each episode on complete run until the environment is done
        episode_rewards = []
        episode_actions = []
        done = False
        # get the first observation out of the environment
        obs = env.reset()
        while not done:
            # _states are only useful when using LSTM policies
            action, _states = model.predict(obs)
            # here, action, rewards and dones are arrays
            # because we are using vectorized env
            obs, reward, done, info = env.step(action)
            # get state,reward pair
            state = obs[0][0]
            reward = reward[0]
            label = info[0].get("true_label")
            observation.append((state, reward, label))   
       
    plot_classification(observation)
    
def plot_classification(observation):
    states = list(list(zip(*observation))[0])
    rewards = list(list(zip(*observation))[1])
    labels = list(list(zip(*observation))[2])
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111, projection='3d')

    ax.scatter(states, rewards, labels, c='y', marker='o')

    ax.set_xlabel('Series Value')
    ax.set_ylabel('Rewards')
    ax.set_zlabel('True Labels')

    plt.show()

debug_states(model)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …