In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%cd ../..

/home/nassim/dev/conrl


In [3]:
from src.conrl import ConRL
from src.qlearning import QLearningAgent
from src.sarsalambda import SarsaLambdaAgent
from src.utils import *
from src.plotting import *

import itertools
import time
import numpy as np
import pandas as pd
import gym
import time
import sys
import copy
from collections import namedtuple
import wandb 
import pickle

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.collections as mc

%matplotlib inline
plt.style.use('seaborn-paper')
np.set_printoptions(precision=3, linewidth=80)



In [4]:
state_size = (10, 10)
env = DiscretizationWrapper(gym.make('MountainCar-v0'), state_size)

num_episodes = 500
max_step = 1000
env.env._max_episode_steps = max_step
env.spec.max_episode_steps = max_step

q_params = {
    "gamma": 0.9,
    "alpha": 0.1,
    "alpha_decay_rate": 0,
    "min_alpha": 0.1,
    "epsilon": 0.9,
    "epsilon_decay_rate": 0,
    "min_epsilon": 0.01
}

q_params["epsilon_decay_rate"] = (q_params["epsilon"] - q_params["min_epsilon"])/(num_episodes//2)
q_params["alpha_decay_rate"] = (q_params["alpha"] - q_params["min_alpha"])/(num_episodes//2)

sl_params = {
    "trace_decay": 0.9,
    "gamma": 0.9,
    "alpha": 0.01,
    "alpha_decay_rate": 0,
    "min_alpha": 0.1,
    "epsilon": 0.9,
    "epsilon_decay_rate": 0,
    "min_epsilon": 0.01
}

sl_params["epsilon_decay_rate"] = (sl_params["epsilon"] - sl_params["min_epsilon"])/(num_episodes//2)
sl_params["alpha_decay_rate"] = (sl_params["alpha"] - sl_params["min_alpha"])/(num_episodes//2)

mlgng_params = {
    "ndim": 2, 
    "e_w":0.05, 
    "e_n":0.005, 
    "l":10, 
    "a":0.5, 
    "b":0.95,
    "k":1000.0, 
    "max_nodes": 10, 
    "max_age": 200,
    "node_multiplier": 10,
    "min_error": 5
}

In [5]:
def run_sl(**kwargs):
    stats_sl =   {
                "step":  np.zeros(num_episodes),
                "cumulative_reward":  np.zeros(num_episodes),
                "q_tables": np.zeros(shape = (num_episodes, ) + state_size + (env.action_space.n, )),
                "best_actions": []
                }

    sl_agent = SarsaLambdaAgent(action_size=env.action_space.n, state_size=state_size, **sl_params)
    sl_agent.train(env, num_episodes, stats_sl)
    return stats_sl

In [6]:
def run_q(**kwargs):
    stats_q =   {
                "step":  np.zeros(num_episodes),
                "cumulative_reward":  np.zeros(num_episodes),
                "q_tables": np.zeros(shape = (num_episodes, ) + state_size + (env.action_space.n, )),
                "best_actions": []
                }

    q_agent = QLearningAgent(action_size=env.action_space.n, state_size=state_size, **q_params)
    q_agent.train(env, num_episodes, stats_q)
    return stats_q

In [7]:
def run_conrl(**kwargs):
    stats = build_conrl_stats(num_episodes, env)

    conrl = ConRL(action_size=env.action_space.n, state_size=state_size, update_threshold=10)
    conrl.init_support(kwargs["support"])
    conrl.init_mlgng(**mlgng_params)

    conrl.train(env, num_episodes, stats)
    return stats

In [8]:
def run_experiments(agent_fn, num_experiments=10, **kwargs):
    experiments = []
    for i in range(num_experiments):
            start = time.time()

            stats = agent_fn(**kwargs)
            experiments.append(stats)
            end = time.time()
            print("\n\n#### Finished experiment {}/{} in {:.2f} min\n\n".format(i+1, num_experiments, (end-start)/60))

    return experiments

In [12]:
def log_stats_to_wandb(experiments, group):

    for stats in experiments:
        run = wandb.init(
            entity="dodicin",
            project="con-rl",
            group=group,
            notes="testing features",
            tags=[group],
            config={"q_params": q_params,
                    "mlgng_params": mlgng_params})

        for episode in range(num_episodes):
            wandb.log({
                    'cumulative_reward': stats["cumulative_reward"][episode], 
                    'steps': stats["step"][episode],
                    #'selector': stats["selector"][episode],
                    #'global_error': {key: value for key, value in enumerate(stats["global_error"][episode])},
                    #'nodes_per_layer': {key: value for key, value in enumerate(stats["nodes"][episode])},
                    #'total_nodes': np.sum(stats["nodes"][episode]),
                    #'global_mean_error': np.mean(stats["global_error"][episode])
                    })

        run.finish()

In [14]:
experiments = run_experiments(run_sl, num_experiments=15)

Episode 50/500, Reward -1000.0, Average Max Reward: -969.4, Total steps 1000.0, Epsilon: 0.72, Alpha: 0.01, Time 0.107
Episode 100/500, Reward -1000.0, Average Max Reward: -885.9, Total steps 1000.0, Epsilon: 0.54, Alpha: 0.01, Time 0.099
Episode 150/500, Reward -516.0, Average Max Reward: -543.9, Total steps 516.0, Epsilon: 0.37, Alpha: 0.01, Time 0.047
Episode 200/500, Reward -661.0, Average Max Reward: -285.7, Total steps 661.0, Epsilon: 0.19, Alpha: 0.01, Time 0.067
Episode 250/500, Reward -195.0, Average Max Reward: -306.5, Total steps 195.0, Epsilon: 0.01, Alpha: 0.01, Time 0.018
Episode 300/500, Reward -193.0, Average Max Reward: -167.3, Total steps 193.0, Epsilon: 0.01, Alpha: 0.01, Time 0.021
Episode 350/500, Reward -155.0, Average Max Reward: -239.8, Total steps 155.0, Epsilon: 0.01, Alpha: 0.01, Time 0.016
Episode 400/500, Reward -201.0, Average Max Reward: -191.5, Total steps 201.0, Epsilon: 0.01, Alpha: 0.01, Time 0.021
Episode 450/500, Reward -312.0, Average Max Reward: -

Episode 400/500, Reward -189.0, Average Max Reward: -174.8, Total steps 189.0, Epsilon: 0.01, Alpha: 0.01, Time 0.020
Episode 450/500, Reward -242.0, Average Max Reward: -187.7, Total steps 242.0, Epsilon: 0.01, Alpha: 0.01, Time 0.026
Episode 500/500, Reward -221.0, Average Max Reward: -169.1, Total steps 221.0, Epsilon: 0.01, Alpha: 0.01, Time 0.024


#### Finished experiment 7/15 in 0.38 min


Episode 50/500, Reward -1000.0, Average Max Reward: -1000.0, Total steps 1000.0, Epsilon: 0.72, Alpha: 0.01, Time 0.117
Episode 100/500, Reward -888.0, Average Max Reward: -975.5, Total steps 888.0, Epsilon: 0.54, Alpha: 0.01, Time 0.117
Episode 150/500, Reward -585.0, Average Max Reward: -542.3, Total steps 585.0, Epsilon: 0.37, Alpha: 0.01, Time 0.060
Episode 200/500, Reward -235.0, Average Max Reward: -306.9, Total steps 235.0, Epsilon: 0.19, Alpha: 0.01, Time 0.027
Episode 250/500, Reward -241.0, Average Max Reward: -200.7, Total steps 241.0, Epsilon: 0.01, Alpha: 0.01, Time 0.025
Episode 

Episode 250/500, Reward -246.0, Average Max Reward: -220.9, Total steps 246.0, Epsilon: 0.01, Alpha: 0.01, Time 0.024
Episode 300/500, Reward -153.0, Average Max Reward: -167.2, Total steps 153.0, Epsilon: 0.01, Alpha: 0.01, Time 0.015
Episode 350/500, Reward -171.0, Average Max Reward: -150.4, Total steps 171.0, Epsilon: 0.01, Alpha: 0.01, Time 0.019
Episode 400/500, Reward -122.0, Average Max Reward: -180.7, Total steps 122.0, Epsilon: 0.01, Alpha: 0.01, Time 0.013
Episode 450/500, Reward -183.0, Average Max Reward: -206.1, Total steps 183.0, Epsilon: 0.01, Alpha: 0.01, Time 0.022
Episode 500/500, Reward -565.0, Average Max Reward: -270.1, Total steps 565.0, Epsilon: 0.01, Alpha: 0.01, Time 0.060


#### Finished experiment 14/15 in 0.38 min


Episode 50/500, Reward -1000.0, Average Max Reward: -1000.0, Total steps 1000.0, Epsilon: 0.72, Alpha: 0.01, Time 0.115
Episode 100/500, Reward -733.0, Average Max Reward: -960.7, Total steps 733.0, Epsilon: 0.54, Alpha: 0.01, Time 0.079
Episode

In [22]:
#support = QLearningAgent(action_size=env.action_space.n, state_size=state_size, **q_params)
support = SarsaLambdaAgent(action_size=env.action_space.n, state_size=state_size, **sl_params)
#experiments = run_experiments(run_conrl, num_experiments=15, support=support)

#### Starting training #####
Episode 50/500, Average Reward: -968.24, Global Error: 0.00, Total steps 186.0, Discount: 0.48, Time 0.386
	 MLGNG nodes per action layer: 2 0 2
Episode 100/500, Average Reward: -304.62, Global Error: 108.58, Total steps 266.0, Discount: 0.23, Time 0.010
	 MLGNG nodes per action layer: 4 5 5
Episode 150/500, Average Reward: -333.66, Global Error: 63.56, Total steps 210.0, Discount: 0.11, Time 0.049
	 MLGNG nodes per action layer: 5 5 5
Episode 200/500, Average Reward: -474.40, Global Error: 33.07, Total steps 191.0, Discount: 0.05, Time 0.158
	 MLGNG nodes per action layer: 5 5 9
Episode 250/500, Average Reward: -241.62, Global Error: 29.68, Total steps 186.0, Discount: 0.02, Time 0.063
	 MLGNG nodes per action layer: 9 6 10
Episode 300/500, Average Reward: -254.88, Global Error: 19.11, Total steps 185.0, Discount: 0.01, Time 0.012
	 MLGNG nodes per action layer: 10 10 10
Episode 350/500, Average Reward: -202.90, Global Error: 18.18, Total steps 166.0, Disc

Episode 50/500, Average Reward: -218.90, Global Error: 62.52, Total steps 147.0, Discount: 0.48, Time 0.010
	 MLGNG nodes per action layer: 2 2 2
Episode 100/500, Average Reward: -203.14, Global Error: 100.69, Total steps 194.0, Discount: 0.23, Time 0.010
	 MLGNG nodes per action layer: 6 3 6
Episode 150/500, Average Reward: -176.66, Global Error: 126.85, Total steps 156.0, Discount: 0.11, Time 0.010
	 MLGNG nodes per action layer: 5 5 6
Episode 200/500, Average Reward: -169.78, Global Error: 99.64, Total steps 191.0, Discount: 0.05, Time 0.010
	 MLGNG nodes per action layer: 6 5 6
Episode 250/500, Average Reward: -167.22, Global Error: 86.41, Total steps 157.0, Discount: 0.02, Time 0.013
	 MLGNG nodes per action layer: 6 5 6
Episode 300/500, Average Reward: -154.90, Global Error: 84.64, Total steps 154.0, Discount: 0.01, Time 0.010
	 MLGNG nodes per action layer: 6 5 6
Episode 350/500, Average Reward: -155.62, Global Error: 78.85, Total steps 159.0, Discount: 0.01, Time 0.010
	 MLGNG 

Episode 50/500, Average Reward: -1000.00, Global Error: 75.22, Total steps 1000.0, Discount: 0.48, Time 0.487
	 MLGNG nodes per action layer: 4 3 5
Episode 100/500, Average Reward: -1000.00, Global Error: 28.00, Total steps 1000.0, Discount: 0.23, Time 0.010
	 MLGNG nodes per action layer: 6 3 5
Episode 150/500, Average Reward: -474.78, Global Error: 45.30, Total steps 450.0, Discount: 0.11, Time 0.081
	 MLGNG nodes per action layer: 4 4 5
Episode 200/500, Average Reward: -497.16, Global Error: 43.67, Total steps 1000.0, Discount: 0.05, Time 0.266
	 MLGNG nodes per action layer: 6 4 5
Episode 250/500, Average Reward: -1000.00, Global Error: 52.85, Total steps 1000.0, Discount: 0.02, Time 0.010
	 MLGNG nodes per action layer: 6 4 5
Episode 300/500, Average Reward: -1000.00, Global Error: 49.11, Total steps 1000.0, Discount: 0.01, Time 0.010
	 MLGNG nodes per action layer: 6 4 5
Episode 350/500, Average Reward: -1000.00, Global Error: 45.34, Total steps 1000.0, Discount: 0.01, Time 0.010

Episode 50/500, Average Reward: -986.86, Global Error: 163.40, Total steps 1000.0, Discount: 0.48, Time 0.487
	 MLGNG nodes per action layer: 5 5 6
Episode 100/500, Average Reward: -1000.00, Global Error: 103.59, Total steps 1000.0, Discount: 0.23, Time 0.010
	 MLGNG nodes per action layer: 5 6 6
Episode 150/500, Average Reward: -1000.00, Global Error: 82.07, Total steps 1000.0, Discount: 0.11, Time 0.010
	 MLGNG nodes per action layer: 5 6 6
Episode 200/500, Average Reward: -968.90, Global Error: 39.78, Total steps 273.0, Discount: 0.05, Time 0.988
	 MLGNG nodes per action layer: 5 6 8
Episode 250/500, Average Reward: -545.64, Global Error: 6.88, Total steps 1000.0, Discount: 0.02, Time 0.117
	 MLGNG nodes per action layer: 10 10 8
Episode 300/500, Average Reward: -1000.00, Global Error: 17.47, Total steps 1000.0, Discount: 0.01, Time 0.010
	 MLGNG nodes per action layer: 10 10 5
Episode 350/500, Average Reward: -908.94, Global Error: 70.76, Total steps 959.0, Discount: 0.01, Time 0.1

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Episode 50/500, Average Reward: -229.22, Global Error: 82.52, Total steps 211.0, Discount: 0.48, Time 0.010
	 MLGNG nodes per action layer: 3 5 3
Episode 100/500, Average Reward: -208.72, Global Error: 100.83, Total steps 208.0, Discount: 0.23, Time 0.010
	 MLGNG nodes per action layer: 4 5 4
Episode 150/500, Average Reward: -205.84, Global Error: 72.37, Total steps 186.0, Discount: 0.11, Time 0.023
	 MLGNG nodes per action layer: 5 6 4
Episode 200/500, Average Reward: -181.02, Global Error: 63.98, Total steps 193.0, Discount: 0.05, Time 0.012
	 MLGNG nodes per action layer: 5 6 4
Episode 250/500, Average Reward: -185.04, Global Error: 61.59, Total steps 187.0, Discount: 0.02, Time 0.037
	 MLGNG nodes per action layer: 5 6 4
Episode 300/500, Average Reward: -287.10, Global Error: 36.85, Total steps 191.0, Discount: 0.01, Time 0.387
	 MLGNG nodes per action layer: 5 6 4
Episode 350/500, Average Reward: -423.50, Global Error: 41.26, Total steps 1000.0, Discount: 0.01, Time 0.626
	 MLGNG 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Episode 50/500, Average Reward: -244.68, Global Error: 99.48, Total steps 211.0, Discount: 0.48, Time 0.036
	 MLGNG nodes per action layer: 2 3 2
Episode 100/500, Average Reward: -230.78, Global Error: 96.52, Total steps 205.0, Discount: 0.23, Time 0.096
	 MLGNG nodes per action layer: 4 5 4
Episode 150/500, Average Reward: -161.82, Global Error: 89.94, Total steps 166.0, Discount: 0.11, Time 0.044
	 MLGNG nodes per action layer: 4 5 4
Episode 200/500, Average Reward: -163.48, Global Error: 88.44, Total steps 156.0, Discount: 0.05, Time 0.052
	 MLGNG nodes per action layer: 4 5 4
Episode 250/500, Average Reward: -175.50, Global Error: 78.64, Total steps 179.0, Discount: 0.02, Time 0.081
	 MLGNG nodes per action layer: 4 5 4
Episode 300/500, Average Reward: -185.86, Global Error: 65.17, Total steps 280.0, Discount: 0.01, Time 0.830
	 MLGNG nodes per action layer: 4 5 4
Episode 350/500, Average Reward: -480.96, Global Error: 47.55, Total steps 824.0, Discount: 0.01, Time 0.291
	 MLGNG no

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Episode 50/500, Average Reward: -987.26, Global Error: 90.75, Total steps 1000.0, Discount: 0.48, Time 0.487
	 MLGNG nodes per action layer: 4 5 5
Episode 100/500, Average Reward: -1000.00, Global Error: 45.65, Total steps 1000.0, Discount: 0.23, Time 0.010
	 MLGNG nodes per action layer: 4 5 5
Episode 150/500, Average Reward: -1000.00, Global Error: 22.31, Total steps 1000.0, Discount: 0.11, Time 0.010
	 MLGNG nodes per action layer: 4 5 4
Episode 200/500, Average Reward: -1000.00, Global Error: 103.43, Total steps 1000.0, Discount: 0.05, Time 0.010
	 MLGNG nodes per action layer: 7 6 8
Episode 250/500, Average Reward: -993.44, Global Error: 10.23, Total steps 1000.0, Discount: 0.02, Time 0.010
	 MLGNG nodes per action layer: 9 8 7
Episode 300/500, Average Reward: -1000.00, Global Error: 2.35, Total steps 1000.0, Discount: 0.01, Time 0.010
	 MLGNG nodes per action layer: 9 4 7
Episode 350/500, Average Reward: -1000.00, Global Error: 86.72, Total steps 207.0, Discount: 0.01, Time 0.010

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Episode 50/500, Average Reward: -542.22, Global Error: 122.33, Total steps 1000.0, Discount: 0.48, Time 0.487
	 MLGNG nodes per action layer: 4 3 4
Episode 100/500, Average Reward: -1000.00, Global Error: 107.50, Total steps 1000.0, Discount: 0.23, Time 0.010
	 MLGNG nodes per action layer: 4 6 5
Episode 150/500, Average Reward: -1000.00, Global Error: 22.17, Total steps 1000.0, Discount: 0.11, Time 0.010
	 MLGNG nodes per action layer: 5 6 5
Episode 200/500, Average Reward: -1000.00, Global Error: 79.19, Total steps 1000.0, Discount: 0.05, Time 0.010
	 MLGNG nodes per action layer: 5 6 5
Episode 250/500, Average Reward: -1000.00, Global Error: 60.16, Total steps 1000.0, Discount: 0.02, Time 0.010
	 MLGNG nodes per action layer: 9 6 4
Episode 300/500, Average Reward: -1000.00, Global Error: 54.52, Total steps 1000.0, Discount: 0.01, Time 0.010
	 MLGNG nodes per action layer: 9 6 4
Episode 350/500, Average Reward: -1000.00, Global Error: 79.63, Total steps 1000.0, Discount: 0.01, Time 0

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Episode 50/500, Average Reward: -190.80, Global Error: 80.78, Total steps 173.0, Discount: 0.48, Time 0.010
	 MLGNG nodes per action layer: 3 2 3
Episode 100/500, Average Reward: -189.30, Global Error: 96.51, Total steps 172.0, Discount: 0.23, Time 0.010
	 MLGNG nodes per action layer: 4 3 5
Episode 150/500, Average Reward: -186.88, Global Error: 81.14, Total steps 195.0, Discount: 0.11, Time 0.036
	 MLGNG nodes per action layer: 5 4 5
Episode 200/500, Average Reward: -182.08, Global Error: 84.30, Total steps 170.0, Discount: 0.05, Time 0.090
	 MLGNG nodes per action layer: 5 4 5
Episode 250/500, Average Reward: -181.58, Global Error: 62.78, Total steps 199.0, Discount: 0.02, Time 0.041
	 MLGNG nodes per action layer: 5 4 5
Episode 300/500, Average Reward: -441.46, Global Error: 25.31, Total steps 139.0, Discount: 0.01, Time 0.268
	 MLGNG nodes per action layer: 8 6 7
Episode 350/500, Average Reward: -579.44, Global Error: 15.30, Total steps 202.0, Discount: 0.01, Time 0.244
	 MLGNG no

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Episode 50/500, Average Reward: -433.06, Global Error: 49.64, Total steps 177.0, Discount: 0.48, Time 0.487
	 MLGNG nodes per action layer: 3 2 2
Episode 100/500, Average Reward: -180.94, Global Error: 131.11, Total steps 180.0, Discount: 0.23, Time 0.010
	 MLGNG nodes per action layer: 6 4 4
Episode 150/500, Average Reward: -169.42, Global Error: 93.43, Total steps 162.0, Discount: 0.11, Time 0.010
	 MLGNG nodes per action layer: 6 4 6
Episode 200/500, Average Reward: -155.68, Global Error: 90.30, Total steps 195.0, Discount: 0.05, Time 0.010
	 MLGNG nodes per action layer: 6 4 6
Episode 250/500, Average Reward: -166.20, Global Error: 77.58, Total steps 117.0, Discount: 0.02, Time 0.010
	 MLGNG nodes per action layer: 6 4 6
Episode 300/500, Average Reward: -161.02, Global Error: 81.41, Total steps 151.0, Discount: 0.01, Time 0.010
	 MLGNG nodes per action layer: 6 4 6
Episode 350/500, Average Reward: -162.18, Global Error: 84.62, Total steps 174.0, Discount: 0.01, Time 0.010
	 MLGNG n

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Episode 50/500, Average Reward: -913.72, Global Error: 78.85, Total steps 281.0, Discount: 0.48, Time 0.537
	 MLGNG nodes per action layer: 5 4 3
Episode 100/500, Average Reward: -320.48, Global Error: 70.34, Total steps 311.0, Discount: 0.23, Time 0.023
	 MLGNG nodes per action layer: 5 4 3
Episode 150/500, Average Reward: -497.40, Global Error: 59.31, Total steps 180.0, Discount: 0.11, Time 0.084
	 MLGNG nodes per action layer: 9 5 4
Episode 200/500, Average Reward: -482.20, Global Error: 11.67, Total steps 176.0, Discount: 0.05, Time 0.107
	 MLGNG nodes per action layer: 9 6 10
Episode 250/500, Average Reward: -250.08, Global Error: 21.78, Total steps 215.0, Discount: 0.02, Time 0.010
	 MLGNG nodes per action layer: 10 6 7
Episode 300/500, Average Reward: -249.08, Global Error: 15.53, Total steps 236.0, Discount: 0.01, Time 0.018
	 MLGNG nodes per action layer: 10 10 7
Episode 350/500, Average Reward: -320.94, Global Error: 20.76, Total steps 213.0, Discount: 0.01, Time 0.109
	 MLGN

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Episode 50/500, Average Reward: -129.34, Global Error: 101.67, Total steps 121.0, Discount: 0.48, Time 0.010
	 MLGNG nodes per action layer: 2 2 2
Episode 100/500, Average Reward: -123.08, Global Error: 121.90, Total steps 122.0, Discount: 0.23, Time 0.010
	 MLGNG nodes per action layer: 4 5 5
Episode 150/500, Average Reward: -122.64, Global Error: 109.08, Total steps 122.0, Discount: 0.11, Time 0.010
	 MLGNG nodes per action layer: 5 5 5
Episode 200/500, Average Reward: -128.92, Global Error: 103.77, Total steps 159.0, Discount: 0.05, Time 0.365
	 MLGNG nodes per action layer: 5 5 5
Episode 250/500, Average Reward: -160.18, Global Error: 63.46, Total steps 143.0, Discount: 0.02, Time 0.102
	 MLGNG nodes per action layer: 5 5 5
Episode 300/500, Average Reward: -173.62, Global Error: 65.02, Total steps 172.0, Discount: 0.01, Time 0.174
	 MLGNG nodes per action layer: 5 5 5
Episode 350/500, Average Reward: -172.12, Global Error: 44.48, Total steps 188.0, Discount: 0.01, Time 0.120
	 MLGN

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Episode 50/500, Average Reward: -165.96, Global Error: 64.93, Total steps 154.0, Discount: 0.48, Time 0.010
	 MLGNG nodes per action layer: 2 2 3
Episode 100/500, Average Reward: -168.48, Global Error: 117.25, Total steps 193.0, Discount: 0.23, Time 0.073
	 MLGNG nodes per action layer: 7 4 5
Episode 150/500, Average Reward: -148.62, Global Error: 76.25, Total steps 123.0, Discount: 0.11, Time 0.010
	 MLGNG nodes per action layer: 7 5 5
Episode 200/500, Average Reward: -173.52, Global Error: 76.77, Total steps 157.0, Discount: 0.05, Time 0.150
	 MLGNG nodes per action layer: 7 5 5
Episode 250/500, Average Reward: -199.72, Global Error: 40.23, Total steps 139.0, Discount: 0.02, Time 0.244
	 MLGNG nodes per action layer: 6 5 5
Episode 300/500, Average Reward: -181.64, Global Error: 48.36, Total steps 153.0, Discount: 0.01, Time 0.059
	 MLGNG nodes per action layer: 8 5 7
Episode 350/500, Average Reward: -160.56, Global Error: 39.01, Total steps 149.0, Discount: 0.01, Time 0.015
	 MLGNG n

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Episode 50/500, Average Reward: -466.60, Global Error: 83.98, Total steps 208.0, Discount: 0.48, Time 0.030
	 MLGNG nodes per action layer: 3 3 2
Episode 100/500, Average Reward: -187.70, Global Error: 94.70, Total steps 192.0, Discount: 0.23, Time 0.012
	 MLGNG nodes per action layer: 4 5 5
Episode 150/500, Average Reward: -173.70, Global Error: 67.47, Total steps 190.0, Discount: 0.11, Time 0.010
	 MLGNG nodes per action layer: 4 6 5
Episode 200/500, Average Reward: -222.90, Global Error: 87.78, Total steps 195.0, Discount: 0.05, Time 0.011
	 MLGNG nodes per action layer: 5 6 5
Episode 250/500, Average Reward: -251.62, Global Error: 76.70, Total steps 233.0, Discount: 0.02, Time 0.021
	 MLGNG nodes per action layer: 5 6 5
Episode 300/500, Average Reward: -241.84, Global Error: 71.56, Total steps 215.0, Discount: 0.01, Time 0.010
	 MLGNG nodes per action layer: 5 6 5
Episode 350/500, Average Reward: -217.32, Global Error: 53.94, Total steps 194.0, Discount: 0.01, Time 0.018
	 MLGNG no

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Episode 50/500, Average Reward: -250.64, Global Error: 64.81, Total steps 1000.0, Discount: 0.48, Time 0.487
	 MLGNG nodes per action layer: 9 2 2
Episode 100/500, Average Reward: -307.32, Global Error: 120.48, Total steps 290.0, Discount: 0.23, Time 0.010
	 MLGNG nodes per action layer: 9 5 4
Episode 150/500, Average Reward: -279.08, Global Error: 99.69, Total steps 294.0, Discount: 0.11, Time 0.010
	 MLGNG nodes per action layer: 9 5 6
Episode 200/500, Average Reward: -252.64, Global Error: 72.01, Total steps 218.0, Discount: 0.05, Time 0.010
	 MLGNG nodes per action layer: 7 5 6
Episode 250/500, Average Reward: -250.86, Global Error: 62.31, Total steps 167.0, Discount: 0.02, Time 0.252
	 MLGNG nodes per action layer: 7 5 6
Episode 300/500, Average Reward: -143.00, Global Error: 43.49, Total steps 181.0, Discount: 0.01, Time 0.010
	 MLGNG nodes per action layer: 7 5 6
Episode 350/500, Average Reward: -140.72, Global Error: 46.46, Total steps 180.0, Discount: 0.01, Time 0.047
	 MLGNG 

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
support = SarsaLambdaAgent(action_size=env.action_space.n, state_size=state_size, **sl_params)
experiments = run_experiments(run_conrl, num_experiments=15, support=support)

In [15]:
wandb.login()
log_stats_to_wandb(experiments, group="sarsal")

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
cumulative_reward,-194.0
steps,194.0
_step,499.0
_runtime,8.0
_timestamp,1611779943.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▁▄▂▆▄▅▅▆▇▅▇▆▆████▇██▇███▇▇▆▇▇▇▇▇▇
steps,████████▅▇▃▅▄▄▃▂▄▂▃▃▁▁▁▁▂▁▁▂▁▁▁▂▂▃▂▂▂▂▂▂
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.0, max=1.0)…

0,1
cumulative_reward,-263.0
steps,263.0
_step,499.0
_runtime,5.0
_timestamp,1611779961.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▁▁▁▃▅▆▅▅▇▆▆▇█▇█▇█▇▇▇▇███▇▇█▇▇▇▇█▇
steps,██████████▆▄▃▄▄▂▃▃▂▁▂▁▂▁▂▂▂▂▁▁▁▂▂▁▂▂▂▂▁▂
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
cumulative_reward,-148.0
steps,148.0
_step,499.0
_runtime,2.0
_timestamp,1611779981.0


0,1
cumulative_reward,▁▁▁▁▂▁▁▁▁▁▄▄▅▇▅█▇▆▇██▇▅██▇███████▇██████
steps,████▇█████▅▅▄▂▄▁▂▃▂▁▁▂▄▁▁▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
cumulative_reward,-155.0
steps,155.0
_step,499.0
_runtime,22.0
_timestamp,1611780006.0


0,1
cumulative_reward,▁▁▁▁▁▁▂▁▅▄▃▆▅▆▆▆▆▆▇████████▇█▆████▇▇▇▇██
steps,██████▇█▄▅▆▃▄▃▃▃▃▃▂▁▁▁▁▁▁▁▁▂▁▃▁▁▁▁▂▂▂▂▁▁
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
cumulative_reward,-172.0
steps,172.0
_step,499.0
_runtime,3.0
_timestamp,1611780039.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▁▁▆▂▆▆▆▇▅▅█▇▇▇██████▇█████▇▇███▇█
steps,█████████▃▇▃▃▃▂▄▄▁▂▂▂▁▁▁▁▁▁▂▁▁▁▁▁▂▂▁▁▁▂▁
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁██████████████████████████████████████
_timestamp,▁▁██████████████████████████████████████


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.0, max=1.0)…

0,1
cumulative_reward,-174.0
steps,174.0
_step,499.0
_runtime,1.0
_timestamp,1611780045.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▁▁▁▅▆▆▅▅▇▇▆██████▇████████▇▇█████
steps,██████████▄▃▃▄▄▂▂▃▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂▂▁▁▁▁▁
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.0, max=1.0)…

0,1
cumulative_reward,-221.0
steps,221.0
_step,499.0
_runtime,5.0
_timestamp,1611780071.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▁▆▅▆▂▆▆▄▆▇▇▇▇▆▇███▇█████████▇▇▇██
steps,████████▃▄▃▇▃▃▅▃▂▂▂▂▃▂▁▁▁▂▁▁▁▁▁▁▁▁▁▂▂▂▁▁
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
cumulative_reward,-234.0
steps,234.0
_step,499.0
_runtime,1.0
_timestamp,1611780079.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▁▃▁▆▅▄▆▆▇▇▇▅████▇███▇▇▇▇█▇▇█▇█▇██
steps,████████▆█▃▄▅▃▃▂▂▂▄▁▁▁▁▂▁▁▁▂▂▂▂▁▂▂▁▂▁▂▁▁
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=0.0, max=1.0)…

0,1
cumulative_reward,-526.0
steps,526.0
_step,499.0
_runtime,6.0
_timestamp,1611780100.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▁▁▄▅▇▅▇▅▃▆▇▇▇▆▇▅███▇█▇▇▆▇██▇█▇█▇▃
steps,█████████▅▄▂▄▂▄▆▃▂▂▂▃▂▄▁▁▁▂▁▂▂▃▂▁▁▂▁▂▁▂▆
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
cumulative_reward,-189.0
steps,189.0
_step,499.0
_runtime,1.0
_timestamp,1611780171.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▁▁▄▂▅▄▁▆▆▇████▇█▆████████▇██▇▇█▇▇
steps,█████████▅▇▄▅█▃▃▂▁▁▁▁▂▁▃▁▁▁▁▁▁▁▁▂▁▁▂▂▁▂▂
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
cumulative_reward,-232.0
steps,232.0
_step,499.0
_runtime,1.0
_timestamp,1611780184.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▁▁▁▅▃▆▆▇▇▇▇▆█████▇▆██▇▇▇▆▅▇█▅▇▇▇▇
steps,██████████▄▆▃▃▂▂▂▂▃▁▁▁▁▁▂▃▁▁▂▂▂▃▄▂▁▄▂▂▂▂
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
cumulative_reward,-247.0
steps,247.0
_step,499.0
_runtime,1.0
_timestamp,1611780189.0


0,1
cumulative_reward,▁▁▁▁▁▃▁▁▁▁▁▃▆▆▅▆▇▇▇████▇▇▇▇█▇▇▇▇██████▆▇
steps,█████▆█████▆▃▃▄▃▂▂▂▁▁▁▁▂▂▂▂▁▂▂▂▂▁▁▁▁▁▁▃▂
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
cumulative_reward,-226.0
steps,226.0
_step,499.0
_runtime,1.0
_timestamp,1611780194.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▁▁▁▄▆███▇▇▇██▇▇███▇█▇█████▇████▇▇
steps,██████████▅▃▁▁▁▂▂▂▁▁▂▂▁▁▁▂▁▂▁▁▁▁▁▂▁▁▁▁▂▂
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
cumulative_reward,-565.0
steps,565.0
_step,499.0
_runtime,1.0
_timestamp,1611780199.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▂▄▁▄▃▆▆▆▇▆█▇▇▆▇██████▇▇██▇▇▇▇▇▇▆▆
steps,███████▇▅█▅▆▃▃▃▂▃▁▂▂▃▂▁▁▁▁▁▁▂▂▁▁▂▂▂▂▂▂▃▃
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁


[34m[1mwandb[0m: wandb version 0.10.15 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
cumulative_reward,-204.0
steps,204.0
_step,499.0
_runtime,1.0
_timestamp,1611780204.0


0,1
cumulative_reward,▁▁▁▁▁▁▁▁▁▄▅▅▆▅▅▆▇▇▇█████▇▆██▇██▇▆▆█▅██▇█
steps,█████████▅▄▄▃▄▄▃▂▂▂▁▁▁▁▁▂▃▁▁▂▁▁▂▃▃▁▄▁▁▂▁
_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
_runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
_timestamp,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
