## Google Colab Setup

Skip this section if running locally


In [None]:
!git clone https://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@github.com/kiritowu/Great-Lunar-Lander.git
%cd Great-Lunar-Lander

Cloning into 'Great-Lunar-Lander'...
remote: Enumerating objects: 610, done.[K
remote: Counting objects: 100% (610/610), done.[K
remote: Compressing objects: 100% (417/417), done.[K
remote: Total 610 (delta 350), reused 435 (delta 183), pack-reused 0[K
Receiving objects: 100% (610/610), 7.89 MiB | 14.82 MiB/s, done.
Resolving deltas: 100% (350/350), done.
/content/Great-Lunar-Lander


In [None]:
%%capture
!pip install Box2D
!pip install box2d
!pip install box2d-py
!pip install gym[all]
!pip install gym[Box_2D]
!pip install wandb     

## Setup


In [None]:
import gym

import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model

from utils import seed_everything, Experience, ReplayBuffer
from collections import deque
from model.dqn import DQN


In [None]:
import wandb

wandb.login()


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

## Define Train Function


In [None]:
def train(config=None):
    with wandb.init(config=config):
        config = wandb.config
        env = gym.make("LunarLander-v2")
        model = DQN(
            env=env,
            lr=config.lr,
            gamma=config.gamma,
            epsilon=config.epsilon,
            epsilon_decay=config.epsilon_decay,
            target_update_interval=config.update_target_net_interval,
            log_wandb=True,
            tuning_condition=True
        )
        model.train(config.episodes, mean_stopping=True)

## Start the Run


In [None]:
sweep_id = "3fzhy39d"
project_name = "DQN-Tuning"
num_runs = 50


In [None]:
wandb.agent(sweep_id, train, count=num_runs, entity="onsen", project=project_name)


[34m[1mwandb[0m: Agent Starting Run: gjeg1qqk with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9743088623295352
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0015985525875294988
[34m[1mwandb[0m: 	update_target_net_interval: 50




[000] Reward: -311.856 | Avg Reward: -311.856 | e: 0.974
[001] Reward: -153.700 | Avg Reward: -232.778 | e: 0.949
[002] Reward:  -73.105 | Avg Reward: -179.554 | e: 0.925
[003] Reward: -114.267 | Avg Reward: -163.232 | e: 0.901
[004] Reward: -227.426 | Avg Reward: -176.071 | e: 0.878
[005] Reward: -184.283 | Avg Reward: -177.440 | e: 0.855
[006] Reward: -114.382 | Avg Reward: -168.431 | e: 0.833
[007] Reward: -197.133 | Avg Reward: -172.019 | e: 0.812
[008] Reward: -142.000 | Avg Reward: -168.684 | e: 0.791
[009] Reward:  -59.920 | Avg Reward: -157.807 | e: 0.771
[010] Reward: -152.069 | Avg Reward: -157.286 | e: 0.751
[011] Reward:  -81.804 | Avg Reward: -150.995 | e: 0.732
[012] Reward:  -95.463 | Avg Reward: -146.724 | e: 0.713
[013] Reward: -139.780 | Avg Reward: -146.228 | e: 0.695
[014] Reward:  -22.711 | Avg Reward: -137.993 | e: 0.677
[015] Reward: -211.803 | Avg Reward: -142.606 | e: 0.659
[016] Reward:  -26.167 | Avg Reward: -135.757 | e: 0.642
[017] Reward: -122.258 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▁▂▂▃▃▄▄▄▅▆▆▆▇▇████▇▇▇▆▆▆▆▆▇▇▇▇▇▇▇█▇▇▅▅▅
Episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Episode Length,▁▁▁▁████▂▇▂▂▂▃▂▂▂▂▂▂▂▂▂▁▂▂▄▄▃▄▃▃▃▂▇▂▂▁▄▃
Epsilon,█▆▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Reward,▅▄▅▆▆▆▆▇▆█▅██▇▇██▆▃▃█▇█▅█▁▇▇▇▅████▇▇▆▅██

0,1
Avg-Reward-100e,40.45057
Episode,499.0
Episode Length,243.0
Epsilon,0.00998
Reward,-198.78555


[34m[1mwandb[0m: Agent Starting Run: e9a9ynz0 with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9851453699121748
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0055478628120233615
[34m[1mwandb[0m: 	update_target_net_interval: 5




[000] Reward: -109.724 | Avg Reward: -109.724 | e: 0.985
[001] Reward: -239.204 | Avg Reward: -174.464 | e: 0.971
[002] Reward: -123.205 | Avg Reward: -157.378 | e: 0.956
[003] Reward: -120.151 | Avg Reward: -148.071 | e: 0.942
[004] Reward: -133.821 | Avg Reward: -145.221 | e: 0.928
[005] Reward: -425.884 | Avg Reward: -191.998 | e: 0.914
[006] Reward: -249.521 | Avg Reward: -200.216 | e: 0.901
[007] Reward:  -83.796 | Avg Reward: -185.663 | e: 0.887
[008] Reward: -251.662 | Avg Reward: -192.996 | e: 0.874
[009] Reward: -147.060 | Avg Reward: -188.403 | e: 0.861
[010] Reward: -275.841 | Avg Reward: -196.352 | e: 0.848
[011] Reward: -140.228 | Avg Reward: -191.675 | e: 0.836
[012] Reward: -132.845 | Avg Reward: -187.149 | e: 0.823
[013] Reward: -115.019 | Avg Reward: -181.997 | e: 0.811
[014] Reward:  -70.707 | Avg Reward: -174.578 | e: 0.799
[015] Reward:   15.000 | Avg Reward: -162.729 | e: 0.787
[016] Reward: -308.154 | Avg Reward: -171.284 | e: 0.775
[017] Reward:  -44.393 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,█▃▄▅▁▂▁▁▁▂▃▃▄▄▄▅▅▅▅▅▆▆▅▅▅▆▆▆▅▅▅▄▄▃▃▃▂▂▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Episode Length,▁▁▂▂▂▂▂▂▂▂▄▂▂▂▂▁▃▃▂▂▂▃▃▃▄▂▂▃▄▁▃▂▂█▃▂▃▄▄█
Epsilon,███▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
Reward,▇▆▇▇▃▇▅▅▇▇█▅█▇▇█▇█▆██▅▆▅▇█▇▅▅▇▅▆▄▁▅▆▄▃▂▆

0,1
Avg-Reward-100e,-195.90388
Episode,58.0
Episode Length,374.0
Epsilon,0.41354
Reward,-168.60342


[34m[1mwandb[0m: Agent Starting Run: c00wblda with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9643597338760698
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.004423673999704721
[34m[1mwandb[0m: 	update_target_net_interval: 20




[000] Reward: -526.227 | Avg Reward: -526.227 | e: 0.964
[001] Reward: -298.748 | Avg Reward: -412.488 | e: 0.930
[002] Reward:  -64.699 | Avg Reward: -296.558 | e: 0.897
[003] Reward: -126.676 | Avg Reward: -254.088 | e: 0.865
[004] Reward:  -65.723 | Avg Reward: -216.415 | e: 0.834
[005] Reward: -154.009 | Avg Reward: -206.014 | e: 0.804
[006] Reward: -255.654 | Avg Reward: -213.105 | e: 0.776
[007] Reward:  -95.049 | Avg Reward: -198.348 | e: 0.748
[008] Reward: -448.265 | Avg Reward: -226.117 | e: 0.721
[009] Reward: -110.222 | Avg Reward: -214.527 | e: 0.696
[010] Reward: -216.195 | Avg Reward: -214.679 | e: 0.671
[011] Reward:  -81.855 | Avg Reward: -203.610 | e: 0.647
[012] Reward: -114.894 | Avg Reward: -196.786 | e: 0.624
[013] Reward:  -98.204 | Avg Reward: -189.744 | e: 0.602
[014] Reward: -366.960 | Avg Reward: -201.559 | e: 0.580
[015] Reward: -355.749 | Avg Reward: -211.196 | e: 0.560
[016] Reward: -408.024 | Avg Reward: -222.774 | e: 0.540
[017] Reward: -125.040 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▃▆▇███▇█████▇█▇▇▇▇▇▇▇▇▇████▇███▇▇▇███▇▇
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▃▃▃▂▄▃▁▅▃▂▁▄▃▄▁▃▂▃▂▃▅▃▄▁▃▃▄▂▆█▃█▅▂▂▄▄▅▅▃
Epsilon,██▇▇▇▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
Reward,▁▄▇▆▆▅▇▂▅▇▆▃▃▃▆▃▃▅▆▃▃▆▆▇▇█▃▄▃▄▆▄▅▅▆█▅▄▃▅

0,1
Avg-Reward-100e,-226.3993
Episode,50.0
Episode Length,95.0
Epsilon,0.15711
Reward,-217.8437


[34m[1mwandb[0m: Agent Starting Run: yboe5qnv with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9805941957816232
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0026970544820563392
[34m[1mwandb[0m: 	update_target_net_interval: 5




[000] Reward: -283.605 | Avg Reward: -283.605 | e: 0.981
[001] Reward: -172.698 | Avg Reward: -228.152 | e: 0.962
[002] Reward:  -85.006 | Avg Reward: -180.437 | e: 0.943
[003] Reward: -209.873 | Avg Reward: -187.796 | e: 0.925
[004] Reward: -131.436 | Avg Reward: -176.524 | e: 0.907
[005] Reward:  -82.893 | Avg Reward: -160.919 | e: 0.889
[006] Reward: -108.141 | Avg Reward: -153.379 | e: 0.872
[007] Reward: -449.514 | Avg Reward: -190.396 | e: 0.855
[008] Reward: -110.509 | Avg Reward: -181.520 | e: 0.838
[009] Reward: -100.148 | Avg Reward: -173.382 | e: 0.822
[010] Reward:  -98.680 | Avg Reward: -166.591 | e: 0.806
[011] Reward: -347.901 | Avg Reward: -181.700 | e: 0.790
[012] Reward: -364.809 | Avg Reward: -195.786 | e: 0.775
[013] Reward: -538.943 | Avg Reward: -220.297 | e: 0.760
[014] Reward: -227.338 | Avg Reward: -220.766 | e: 0.745
[015] Reward: -112.835 | Avg Reward: -214.021 | e: 0.731
[016] Reward: -603.348 | Avg Reward: -236.922 | e: 0.717
[017] Reward: -368.124 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▂▅▇▆██▆▇▇▇▆▅▅▄▄▃▃▃▃▂▃▂▂▂▂▂▂▂▂▁▁▁▁▁▂▂▁▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▂▅▂▃▃▄▃▄▁▃▂▁▂▄▃▅▅▆▄▃▁▇▄▃▆█▆▃▆▃█▆▃▁▇▃▄▂▅▇
Epsilon,███▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁
Reward,▅▇█▆█▇▃▇▇▄▄▆▇▁▄▄▄▅▆▃▆▄▅▅▅▃▃▅▄▆▅▅▅▅█▆▄▃▇▁

0,1
Avg-Reward-100e,-306.38091
Episode,50.0
Episode Length,165.0
Epsilon,0.36809
Reward,-564.67637


[34m[1mwandb[0m: Agent Starting Run: 37ds7hgm with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.951968473056692
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.006049206698017609
[34m[1mwandb[0m: 	update_target_net_interval: 50




[000] Reward: -335.891 | Avg Reward: -335.891 | e: 0.952
[001] Reward:  -57.960 | Avg Reward: -196.926 | e: 0.906
[002] Reward: -197.994 | Avg Reward: -197.282 | e: 0.863
[003] Reward: -297.786 | Avg Reward: -222.408 | e: 0.821
[004] Reward: -188.868 | Avg Reward: -215.700 | e: 0.782
[005] Reward: -227.715 | Avg Reward: -217.702 | e: 0.744
[006] Reward:  -94.127 | Avg Reward: -200.049 | e: 0.709
[007] Reward: -214.281 | Avg Reward: -201.828 | e: 0.674
[008] Reward: -138.439 | Avg Reward: -194.785 | e: 0.642
[009] Reward: -193.036 | Avg Reward: -194.610 | e: 0.611
[010] Reward: -109.910 | Avg Reward: -186.910 | e: 0.582
[011] Reward:  -87.918 | Avg Reward: -178.661 | e: 0.554
[012] Reward:  -91.341 | Avg Reward: -171.944 | e: 0.527
[013] Reward: -109.542 | Avg Reward: -167.486 | e: 0.502
[014] Reward:  -52.909 | Avg Reward: -159.848 | e: 0.478
[015] Reward: -152.677 | Avg Reward: -159.400 | e: 0.455
[016] Reward:  -74.685 | Avg Reward: -154.416 | e: 0.433
[017] Reward:  -27.767 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▆▆▅▅▆▆▆▆▇▇▇▇▇██████▇▇███████████▇▇▇▆▆▆▅
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▂▂▂▄▄▃▂▂▃▃▄▄▃▆▃▃▂▁▄▂▃▃▁▁▃▃▂▁▂▂▁▁▂█▂▁▃▂▂▂
Epsilon,██▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
Reward,▆█▇▆▇█▇▇▇███▇███▇▇▆▇▇▇▇▇▇█▇▇▇█▇▇▅▁▇▇▅▅▆▆

0,1
Avg-Reward-100e,-210.84973
Episode,50.0
Episode Length,69.0
Epsilon,0.08124
Reward,-311.27501


[34m[1mwandb[0m: Agent Starting Run: usz7k5oi with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.957467581985955
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.009589738205663173
[34m[1mwandb[0m: 	update_target_net_interval: 5




[000] Reward: -253.361 | Avg Reward: -253.361 | e: 0.957
[001] Reward: -156.936 | Avg Reward: -205.149 | e: 0.917
[002] Reward:  -90.858 | Avg Reward: -167.052 | e: 0.878
[003] Reward: -239.506 | Avg Reward: -185.165 | e: 0.840
[004] Reward: -318.488 | Avg Reward: -211.830 | e: 0.805
[005] Reward: -253.585 | Avg Reward: -218.789 | e: 0.770
[006] Reward: -195.054 | Avg Reward: -215.398 | e: 0.738
[007] Reward: -227.904 | Avg Reward: -216.962 | e: 0.706
[008] Reward: -214.784 | Avg Reward: -216.720 | e: 0.676
[009] Reward:  -53.318 | Avg Reward: -200.379 | e: 0.648
[010] Reward:  -61.370 | Avg Reward: -187.742 | e: 0.620
[011] Reward:  -93.603 | Avg Reward: -179.897 | e: 0.594
[012] Reward:  -85.465 | Avg Reward: -172.633 | e: 0.568
[013] Reward:  -75.709 | Avg Reward: -165.710 | e: 0.544
[014] Reward: -169.066 | Avg Reward: -165.934 | e: 0.521
[015] Reward:    7.576 | Avg Reward: -155.089 | e: 0.499
[016] Reward:   -7.785 | Avg Reward: -146.424 | e: 0.478
[017] Reward:  -86.745 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▃▄▃▃▄▅▅▆▆▇▇▇▇███▇▇▇▇▆▆▆▆▅▅▄▄▄▄▄▄▄▄▄▄▄▄▄
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Episode Length,▁▁▁▁▁▁▁▂▂▂▂▂▂▄▁▂▁▂▁▂▁▁▁▁▁▁▃▂▂▁▂▄▃▅▄▃▁▁▁█
Epsilon,██▇▇▆▆▅▅▅▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
Reward,▄▅▄▄▅▆▆▆▇▆▆▆▇▆▇▆▆▃▇▅▅▄▅▃▃▃▂▁▂▆▅█▇▃▅▆▄▁▄▆

0,1
Avg-Reward-100e,-196.16076
Episode,75.0
Episode Length,999.0
Epsilon,0.03676
Reward,-49.37819


[34m[1mwandb[0m: Agent Starting Run: lwym52l9 with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.977675656780409
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.00570870852324875
[34m[1mwandb[0m: 	update_target_net_interval: 10




[000] Reward: -113.095 | Avg Reward: -113.095 | e: 0.978
[001] Reward: -110.615 | Avg Reward: -111.855 | e: 0.956
[002] Reward:  -12.536 | Avg Reward:  -78.749 | e: 0.935
[003] Reward: -117.737 | Avg Reward:  -88.496 | e: 0.914
[004] Reward: -268.794 | Avg Reward: -124.555 | e: 0.893
[005] Reward: -161.058 | Avg Reward: -130.639 | e: 0.873
[006] Reward: -142.172 | Avg Reward: -132.287 | e: 0.854
[007] Reward: -290.263 | Avg Reward: -152.034 | e: 0.835
[008] Reward: -120.989 | Avg Reward: -148.584 | e: 0.816
[009] Reward: -314.230 | Avg Reward: -165.149 | e: 0.798
[010] Reward:  -35.637 | Avg Reward: -153.375 | e: 0.780
[011] Reward: -252.449 | Avg Reward: -161.631 | e: 0.763
[012] Reward:   -9.373 | Avg Reward: -149.919 | e: 0.746
[013] Reward:  -37.956 | Avg Reward: -141.922 | e: 0.729
[014] Reward:  -82.485 | Avg Reward: -137.959 | e: 0.713
[015] Reward: -130.688 | Avg Reward: -137.505 | e: 0.697
[016] Reward: -450.929 | Avg Reward: -155.941 | e: 0.681
[017] Reward: -156.740 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▆▇██▆▆▅▅▅▄▅▅▅▅▅▄▄▄▄▄▅▄▄▄▄▄▄▄▄▄▃▃▃▂▂▂▂▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▂▁▂▁▁▂▁▁▁▁▂▁▁▂▂▂▁▂▃▃▂▃▅▂▄▁▄█▁▁▂▁▁▁▁▂▁▁▁▂
Epsilon,███▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁
Reward,▇▇█▇▆▆▅▇█▅█▇▇▃▆▄▇▆▆▆▇▅▄▆▆▇▄▄▆▃▁▆▃▃▆▃▁▃▅▂

0,1
Avg-Reward-100e,-236.50445
Episode,50.0
Episode Length,142.0
Epsilon,0.31618
Reward,-479.25932


[34m[1mwandb[0m: Agent Starting Run: b0tqqdhg with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9595083622003894
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.007361246875651332
[34m[1mwandb[0m: 	update_target_net_interval: 50




[000] Reward: -312.259 | Avg Reward: -312.259 | e: 0.960
[001] Reward: -249.632 | Avg Reward: -280.946 | e: 0.921
[002] Reward: -105.817 | Avg Reward: -222.569 | e: 0.883
[003] Reward: -105.237 | Avg Reward: -193.236 | e: 0.848
[004] Reward: -201.481 | Avg Reward: -194.885 | e: 0.813
[005] Reward: -183.064 | Avg Reward: -192.915 | e: 0.780
[006] Reward: -117.595 | Avg Reward: -182.155 | e: 0.749
[007] Reward: -115.534 | Avg Reward: -173.827 | e: 0.718
[008] Reward:  -55.279 | Avg Reward: -160.655 | e: 0.689
[009] Reward: -114.043 | Avg Reward: -155.994 | e: 0.661
[010] Reward:  -64.081 | Avg Reward: -147.638 | e: 0.635
[011] Reward: -363.230 | Avg Reward: -165.604 | e: 0.609
[012] Reward: -327.957 | Avg Reward: -178.093 | e: 0.584
[013] Reward: -163.283 | Avg Reward: -177.035 | e: 0.561
[014] Reward: -367.028 | Avg Reward: -189.701 | e: 0.538
[015] Reward: -618.563 | Avg Reward: -216.505 | e: 0.516
[016] Reward: -302.653 | Avg Reward: -221.573 | e: 0.495
[017] Reward: -172.850 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▂▅▆▆▇▇█▇▇▆▅▅▄▄▃▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Episode Length,▂▃▂▂▃▄▃▂▂▂▂█▂▃▂▂▃▁▂▂▂▂▂▁▁▁▁▂▂▂▂▂▂▂▂▃▂█▆▇
Epsilon,██▇▇▇▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
Reward,▅▅▇▆▆▇█▇▄▄▄▁▆▂▂▃█▆▆▆▇▆▄▆▇▇▇▇▂▇███▇▇█▇▅▅▂

0,1
Avg-Reward-100e,-199.54207
Episode,56.0
Episode Length,245.0
Epsilon,0.09479
Reward,-497.07385


[34m[1mwandb[0m: Agent Starting Run: 5n57msb7 with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9765239750907048
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.00828467018804905
[34m[1mwandb[0m: 	update_target_net_interval: 50




[000] Reward: -271.922 | Avg Reward: -271.922 | e: 0.977
[001] Reward:  -56.774 | Avg Reward: -164.348 | e: 0.954
[002] Reward: -129.351 | Avg Reward: -152.682 | e: 0.931
[003] Reward:  -78.639 | Avg Reward: -134.171 | e: 0.909
[004] Reward: -129.809 | Avg Reward: -133.299 | e: 0.888
[005] Reward: -100.680 | Avg Reward: -127.862 | e: 0.867
[006] Reward: -109.056 | Avg Reward: -125.176 | e: 0.847
[007] Reward: -141.310 | Avg Reward: -127.192 | e: 0.827
[008] Reward: -125.168 | Avg Reward: -126.967 | e: 0.808
[009] Reward: -370.718 | Avg Reward: -151.343 | e: 0.789
[010] Reward: -165.060 | Avg Reward: -152.590 | e: 0.770
[011] Reward: -214.176 | Avg Reward: -157.722 | e: 0.752
[012] Reward: -102.024 | Avg Reward: -153.437 | e: 0.734
[013] Reward: -236.307 | Avg Reward: -159.357 | e: 0.717
[014] Reward:  -95.778 | Avg Reward: -155.118 | e: 0.700
[015] Reward: -475.555 | Avg Reward: -175.145 | e: 0.684
[016] Reward: -290.585 | Avg Reward: -181.936 | e: 0.668
[017] Reward: -228.087 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▂▆▇█████▇▇▇▇▆▆▆▅▄▄▄▄▃▄▃▃▃▃▃▂▃▂▂▂▂▁▁▁▁▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▅▂▅▂▂▅▃▄▅█▅▂▆▅▃▄▂▄▂▅▆▁▄▅▄▂▅▃▂▁▁▃▄▁▂▃▃▃▄▁
Epsilon,███▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁
Reward,▆█▇██▇▇▇▇▆██▃▅▆▂▅▃▇▆▄▇▄▅▂▅▃▃█▇▅▃▁▅▄▃▆▅▄▅

0,1
Avg-Reward-100e,-303.12217
Episode,50.0
Episode Length,50.0
Epsilon,0.29773
Reward,-307.42027


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: aayso0dm with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9541697498734372
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.009679894459300488
[34m[1mwandb[0m: 	update_target_net_interval: 20




[000] Reward: -280.108 | Avg Reward: -280.108 | e: 0.954
[001] Reward:  -24.499 | Avg Reward: -152.304 | e: 0.910
[002] Reward:  -80.171 | Avg Reward: -128.260 | e: 0.869
[003] Reward: -281.212 | Avg Reward: -166.498 | e: 0.829
[004] Reward:   46.071 | Avg Reward: -123.984 | e: 0.791
[005] Reward:  -94.030 | Avg Reward: -118.992 | e: 0.755
[006] Reward: -267.111 | Avg Reward: -140.151 | e: 0.720
[007] Reward: -132.004 | Avg Reward: -139.133 | e: 0.687
[008] Reward: -565.340 | Avg Reward: -186.489 | e: 0.656
[009] Reward: -328.883 | Avg Reward: -200.729 | e: 0.626
[010] Reward: -478.271 | Avg Reward: -225.960 | e: 0.597
[011] Reward: -405.805 | Avg Reward: -240.947 | e: 0.570
[012] Reward: -334.148 | Avg Reward: -248.116 | e: 0.543
[013] Reward: -384.837 | Avg Reward: -257.882 | e: 0.519
[014] Reward: -321.004 | Avg Reward: -262.090 | e: 0.495
[015] Reward: -242.932 | Avg Reward: -260.893 | e: 0.472
[016] Reward: -546.854 | Avg Reward: -277.714 | e: 0.450
[017] Reward: -459.253 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▃▇█▇█▇▇▆▅▄▄▄▄▃▃▃▃▃▂▃▂▃▃▃▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▄▃▆▃▆▃▄▃▃▂▂▂▁▃▃▃▃▃▃▂▄▅▂▅▄▄▂▃▂▃▂▅▄▆▅█▄▂▅▆
Epsilon,██▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
Reward,▅█▇▅▇▅▇▂▃▄▅▅▆▂▃▅▅▄▆▇▃▇▄▃▃▄▄▁▃▂▃▅▄▅▅▅▃▃▄▅

0,1
Avg-Reward-100e,-344.19469
Episode,50.0
Episode Length,111.0
Epsilon,0.09139
Reward,-278.69335


[34m[1mwandb[0m: Agent Starting Run: 2p5rn5ry with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9766859932313726
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.005718350698325413
[34m[1mwandb[0m: 	update_target_net_interval: 1




[000] Reward: -134.804 | Avg Reward: -134.804 | e: 0.977
[001] Reward: -150.131 | Avg Reward: -142.467 | e: 0.954
[002] Reward: -185.616 | Avg Reward: -156.850 | e: 0.932
[003] Reward: -125.568 | Avg Reward: -149.029 | e: 0.910
[004] Reward:  -35.430 | Avg Reward: -126.309 | e: 0.889
[005] Reward: -331.888 | Avg Reward: -160.572 | e: 0.868
[006] Reward: -311.861 | Avg Reward: -182.185 | e: 0.848
[007] Reward: -293.474 | Avg Reward: -196.096 | e: 0.828
[008] Reward:  -80.935 | Avg Reward: -183.301 | e: 0.809
[009] Reward:  -61.140 | Avg Reward: -171.084 | e: 0.790
[010] Reward:  -97.803 | Avg Reward: -164.422 | e: 0.771
[011] Reward: -103.152 | Avg Reward: -159.317 | e: 0.753
[012] Reward: -383.849 | Avg Reward: -176.588 | e: 0.736
[013] Reward: -168.357 | Avg Reward: -176.000 | e: 0.719
[014] Reward: -147.400 | Avg Reward: -174.094 | e: 0.702
[015] Reward:  -81.928 | Avg Reward: -168.333 | e: 0.686
[016] Reward: -181.706 | Avg Reward: -169.120 | e: 0.670
[017] Reward: -366.602 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,██▇▇▇▆▆▆▇▇▆▆▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▂▂▂▁▁▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▁▂▁▃▂▃▂▂▁▂▂▂▁▂▃▃▄▂▂▁▂▂▂▂▁▆▅▃▅▄▄▂▇▆▁█▅▂▃▂
Epsilon,███▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁
Reward,▇▇▇▇▅▆▆█▇▇▅▇█▇▅▆▅▇█▆▆▆▄▅▇▅▅▅▃▆▆▄▃▁▆▃▅▆▅▇

0,1
Avg-Reward-100e,-311.17629
Episode,50.0
Episode Length,71.0
Epsilon,0.30026
Reward,-175.31664


[34m[1mwandb[0m: Agent Starting Run: twxbmqlu with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9526007758397048
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.001164105840628992
[34m[1mwandb[0m: 	update_target_net_interval: 50




[000] Reward: -104.733 | Avg Reward: -104.733 | e: 0.953
[001] Reward: -154.950 | Avg Reward: -129.841 | e: 0.907
[002] Reward:  -20.008 | Avg Reward:  -93.230 | e: 0.864
[003] Reward: -348.849 | Avg Reward: -157.135 | e: 0.823
[004] Reward: -354.359 | Avg Reward: -196.580 | e: 0.784
[005] Reward: -195.476 | Avg Reward: -196.396 | e: 0.747
[006] Reward: -180.958 | Avg Reward: -194.191 | e: 0.712
[007] Reward: -236.558 | Avg Reward: -199.486 | e: 0.678
[008] Reward:  -84.898 | Avg Reward: -186.754 | e: 0.646
[009] Reward: -176.327 | Avg Reward: -185.712 | e: 0.615
[010] Reward:  -20.355 | Avg Reward: -170.679 | e: 0.586
[011] Reward: -102.986 | Avg Reward: -165.038 | e: 0.558
[012] Reward: -325.301 | Avg Reward: -177.366 | e: 0.532
[013] Reward: -106.267 | Avg Reward: -172.287 | e: 0.507
[014] Reward: -324.838 | Avg Reward: -182.458 | e: 0.483
[015] Reward:  -40.210 | Avg Reward: -173.567 | e: 0.460
[016] Reward: -177.287 | Avg Reward: -173.786 | e: 0.438
[017] Reward: -203.602 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▇▆█▅▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄▃▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▄▃▄▅▂▆▅▂▅▂▆▅▆▄▂█▅▃▃▅▃▁▄▅▄▂▂▅▄▂▁▂▂▁▂▃▃▄▄▃
Epsilon,██▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
Reward,▇▆█▄▆▆▅▇█▇▄▄█▆▆▅▇▇▅▂▆▇▄▁▄▅▂▅█▇▄▆█▅▃▄▃▄▃▂

0,1
Avg-Reward-100e,-234.31427
Episode,50.0
Episode Length,85.0
Epsilon,0.08403
Reward,-463.45031


[34m[1mwandb[0m: Agent Starting Run: q85g5xoz with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9570179922510657
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.00903906159061347
[34m[1mwandb[0m: 	update_target_net_interval: 10




[000] Reward:  -71.093 | Avg Reward:  -71.093 | e: 0.957
[001] Reward:  -87.348 | Avg Reward:  -79.221 | e: 0.916
[002] Reward: -420.631 | Avg Reward: -193.024 | e: 0.877
[003] Reward:  -70.783 | Avg Reward: -162.464 | e: 0.839
[004] Reward: -189.678 | Avg Reward: -167.907 | e: 0.803
[005] Reward: -323.976 | Avg Reward: -193.918 | e: 0.768
[006] Reward: -212.792 | Avg Reward: -196.614 | e: 0.735
[007] Reward: -176.593 | Avg Reward: -194.112 | e: 0.704
[008] Reward: -366.118 | Avg Reward: -213.224 | e: 0.673
[009] Reward: -525.313 | Avg Reward: -244.433 | e: 0.644
[010] Reward: -151.268 | Avg Reward: -235.963 | e: 0.617
[011] Reward: -385.308 | Avg Reward: -248.409 | e: 0.590
[012] Reward: -652.420 | Avg Reward: -279.486 | e: 0.565
[013] Reward: -473.429 | Avg Reward: -293.339 | e: 0.541
[014] Reward: -461.553 | Avg Reward: -304.554 | e: 0.517
[015] Reward: -319.240 | Avg Reward: -305.471 | e: 0.495
[016] Reward: -221.847 | Avg Reward: -300.552 | e: 0.474
[017] Reward:  -53.470 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,██▅▆▅▅▅▄▄▄▃▂▂▂▃▃▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▂▁▂▁▁▂▂▂▁▂▂▂▂▂▂▄▂▁▂▁▂▂▁▁▂▂▁▂▂▂▃▃▂▁▁▁▄▁█▅
Epsilon,██▇▇▆▆▆▆▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
Reward,██▇█▇▇█▇█▇▆▇▇▇█▇▇█▇█▇▇▇▇▇▇██▇▇▆▆▆▇▇█▆▇▁▇

0,1
Avg-Reward-100e,-352.07712
Episode,50.0
Episode Length,277.0
Epsilon,0.1064
Reward,-323.2851


[34m[1mwandb[0m: Agent Starting Run: 9mkswodq with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9703493514254146
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0026709320814524
[34m[1mwandb[0m: 	update_target_net_interval: 10




[000] Reward: -136.939 | Avg Reward: -136.939 | e: 0.970
[001] Reward:  -56.395 | Avg Reward:  -96.667 | e: 0.942
[002] Reward: -301.901 | Avg Reward: -165.079 | e: 0.914
[003] Reward: -343.960 | Avg Reward: -209.799 | e: 0.887
[004] Reward: -146.135 | Avg Reward: -197.066 | e: 0.860
[005] Reward: -171.767 | Avg Reward: -192.850 | e: 0.835
[006] Reward: -311.993 | Avg Reward: -209.870 | e: 0.810
[007] Reward: -374.389 | Avg Reward: -230.435 | e: 0.786
[008] Reward: -338.667 | Avg Reward: -242.461 | e: 0.763
[009] Reward: -107.167 | Avg Reward: -228.931 | e: 0.740
[010] Reward: -288.513 | Avg Reward: -234.348 | e: 0.718
[011] Reward:  -66.373 | Avg Reward: -220.350 | e: 0.697
[012] Reward: -190.177 | Avg Reward: -218.029 | e: 0.676
[013] Reward: -254.109 | Avg Reward: -220.606 | e: 0.656
[014] Reward: -144.066 | Avg Reward: -215.504 | e: 0.637
[015] Reward: -175.340 | Avg Reward: -212.993 | e: 0.618
[016] Reward:  -55.571 | Avg Reward: -203.733 | e: 0.599
[017] Reward: -126.221 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▆█▅▃▃▃▂▁▁▂▂▂▂▃▃▂▃▃▂▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▁▁▁▂▁▁▁▁▂▂▂▁▂▁▁▃▃▃▃▂▂▂▂▂▄▂▂▃▂▃▂▁█▅▇▂▄▄▄▄
Epsilon,██▇▇▇▇▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Reward,▆▇▄▃▆▄▃▄▄▇▆▆▆▇▆▁▇▆▄▄█▄▅▆▂▆▅▅▄▅▆▇▆▅▆▄██▇▆

0,1
Avg-Reward-100e,-203.75147
Episode,50.0
Episode Length,318.0
Epsilon,0.21544
Reward,-168.87353


[34m[1mwandb[0m: Agent Starting Run: h5scrbuz with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9654372859802328
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.006539632288446007
[34m[1mwandb[0m: 	update_target_net_interval: 100




[000] Reward:  -63.181 | Avg Reward:  -63.181 | e: 0.965
[001] Reward:  -57.279 | Avg Reward:  -60.230 | e: 0.932
[002] Reward: -298.351 | Avg Reward: -139.604 | e: 0.900
[003] Reward: -170.447 | Avg Reward: -147.314 | e: 0.869
[004] Reward:   -1.554 | Avg Reward: -118.162 | e: 0.839
[005] Reward: -183.177 | Avg Reward: -128.998 | e: 0.810
[006] Reward: -126.694 | Avg Reward: -128.669 | e: 0.782
[007] Reward: -221.353 | Avg Reward: -140.254 | e: 0.755
[008] Reward: -389.435 | Avg Reward: -167.941 | e: 0.729
[009] Reward: -110.693 | Avg Reward: -162.216 | e: 0.703
[010] Reward: -257.233 | Avg Reward: -170.854 | e: 0.679
[011] Reward:  -82.560 | Avg Reward: -163.496 | e: 0.656
[012] Reward:  -92.023 | Avg Reward: -157.998 | e: 0.633
[013] Reward:  -88.158 | Avg Reward: -153.010 | e: 0.611
[014] Reward: -627.263 | Avg Reward: -184.627 | e: 0.590
[015] Reward: -243.655 | Avg Reward: -188.316 | e: 0.570
[016] Reward: -265.976 | Avg Reward: -192.884 | e: 0.550
[017] Reward: -194.110 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,██▄▅▅▅▃▃▃▃▄▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂
Episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Episode Length,▁▁▂▂▂▄▂▃▃▃▃▃▃▃▆▂▄▃▄▃█▅▇▂▃▃▄▃▅▄▄▄▅▃▄▁▇▁█▂
Epsilon,██▇▇▇▆▆▆▆▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
Reward,▇▇▅█▆▇▅▇▆▇▇▃▆▆▇▇▅▆▇▇▃▇▇▃▄▇▇▆█▇▇▇▆▇▇▇▆▇▁█

0,1
Avg-Reward-100e,-194.7186
Episode,53.0
Episode Length,94.0
Epsilon,0.14966
Reward,11.94396


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jtdf07m1 with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9730224073414075
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.00845159167456381
[34m[1mwandb[0m: 	update_target_net_interval: 20




[000] Reward: -106.818 | Avg Reward: -106.818 | e: 0.973
[001] Reward:  -99.511 | Avg Reward: -103.165 | e: 0.947
[002] Reward:  -57.611 | Avg Reward:  -87.980 | e: 0.921
[003] Reward:  -92.239 | Avg Reward:  -89.045 | e: 0.896
[004] Reward: -272.551 | Avg Reward: -125.746 | e: 0.872
[005] Reward: -247.933 | Avg Reward: -146.111 | e: 0.849
[006] Reward: -340.174 | Avg Reward: -173.834 | e: 0.826
[007] Reward: -298.504 | Avg Reward: -189.418 | e: 0.803
[008] Reward: -102.907 | Avg Reward: -179.805 | e: 0.782
[009] Reward: -112.881 | Avg Reward: -173.113 | e: 0.761
[010] Reward: -152.003 | Avg Reward: -171.194 | e: 0.740
[011] Reward: -362.384 | Avg Reward: -187.126 | e: 0.720
[012] Reward: -415.153 | Avg Reward: -204.667 | e: 0.701
[013] Reward: -358.792 | Avg Reward: -215.676 | e: 0.682
[014] Reward: -388.821 | Avg Reward: -227.219 | e: 0.664
[015] Reward: -331.682 | Avg Reward: -233.748 | e: 0.646
[016] Reward: -157.983 | Avg Reward: -229.291 | e: 0.628
[017] Reward:   35.392 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▇▇██▆▄▄▄▅▄▃▂▂▂▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▂▂▁▁▂▂▁▁▁▂▂▂▁▁▁▁▁▂▂▂▂▁▅▂▄▃▂▂▄▄▂▂▂▂▂▂▃▅▁█
Epsilon,███▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁
Reward,▇▇▇▇▆▅▅▇▆▅▄▄▅▆█▇▇▅▄▄▆▅▄▅▅▆▆▅▇▆▆▄▆▄▆▅▇▄▆▁

0,1
Avg-Reward-100e,-258.13826
Episode,50.0
Episode Length,355.0
Epsilon,0.24789
Reward,-795.40728


[34m[1mwandb[0m: Agent Starting Run: pmjacxue with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9638062263747436
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.009236769770449576
[34m[1mwandb[0m: 	update_target_net_interval: 50




[000] Reward: -140.382 | Avg Reward: -140.382 | e: 0.964
[001] Reward: -372.333 | Avg Reward: -256.357 | e: 0.929
[002] Reward:  -56.436 | Avg Reward: -189.717 | e: 0.895
[003] Reward:  -56.501 | Avg Reward: -156.413 | e: 0.863
[004] Reward: -147.083 | Avg Reward: -154.547 | e: 0.832
[005] Reward: -142.908 | Avg Reward: -152.607 | e: 0.802
[006] Reward: -286.158 | Avg Reward: -171.686 | e: 0.773
[007] Reward: -341.729 | Avg Reward: -192.941 | e: 0.745
[008] Reward:  -57.320 | Avg Reward: -177.872 | e: 0.718
[009] Reward: -415.734 | Avg Reward: -201.658 | e: 0.692
[010] Reward: -332.802 | Avg Reward: -213.581 | e: 0.667
[011] Reward: -383.431 | Avg Reward: -227.735 | e: 0.643
[012] Reward:  -85.084 | Avg Reward: -216.762 | e: 0.619
[013] Reward: -261.074 | Avg Reward: -219.927 | e: 0.597
[014] Reward: -129.225 | Avg Reward: -213.880 | e: 0.575
[015] Reward:  -84.449 | Avg Reward: -205.791 | e: 0.554
[016] Reward: -140.935 | Avg Reward: -201.976 | e: 0.534
[017] Reward: -134.319 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,█▃▆▇▇▇▆▆▅▄▄▅▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▂▂▂▁▁▁▁▁▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,█▅▃▆▆▄▇▃█▄▂▂▂▃▄▅▄▆▆▅█▂▃▂▁▂▃▅▃▆▁▅▃▂▃▃▂▂▅▃
Epsilon,██▇▇▇▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
Reward,▇▄██▇▅▅█▅▄█▇█▇▇▄█▆▂▄▆▇▃▅▆▅▃▄▅▃▅▁▅▇▃▇▆▅▄▃

0,1
Avg-Reward-100e,-288.03544
Episode,50.0
Episode Length,72.0
Epsilon,0.15257
Reward,-533.43908


[34m[1mwandb[0m: Agent Starting Run: r3h3uqzo with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.963228287825774
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.009523592381047347
[34m[1mwandb[0m: 	update_target_net_interval: 20




[000] Reward:  -67.960 | Avg Reward:  -67.960 | e: 0.963
[001] Reward: -278.647 | Avg Reward: -173.303 | e: 0.928
[002] Reward: -138.792 | Avg Reward: -161.800 | e: 0.894
[003] Reward: -283.534 | Avg Reward: -192.233 | e: 0.861
[004] Reward: -155.181 | Avg Reward: -184.823 | e: 0.829
[005] Reward: -240.607 | Avg Reward: -194.120 | e: 0.799
[006] Reward:  -54.627 | Avg Reward: -174.193 | e: 0.769
[007] Reward: -187.898 | Avg Reward: -175.906 | e: 0.741
[008] Reward: -178.858 | Avg Reward: -176.234 | e: 0.714
[009] Reward: -378.859 | Avg Reward: -196.496 | e: 0.688
[010] Reward: -202.777 | Avg Reward: -197.067 | e: 0.662
[011] Reward: -458.304 | Avg Reward: -218.837 | e: 0.638
[012] Reward: -237.636 | Avg Reward: -220.283 | e: 0.614
[013] Reward: -198.036 | Avg Reward: -218.694 | e: 0.592
[014] Reward: -325.462 | Avg Reward: -225.812 | e: 0.570
[015] Reward: -118.853 | Avg Reward: -219.127 | e: 0.549
[016] Reward: -485.946 | Avg Reward: -234.822 | e: 0.529
[017] Reward:  -32.896 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,█▄▄▃▃▄▄▄▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▁▁▃▂▂▃▅█▂▃▄▂█▃▄█▆
Epsilon,██▇▇▇▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
Reward,▇▄▆▄▅█▆▆▅▁▅▃▇▁█▇▆▃▂▅▅▄▅▃▇▂▄▄▅▃▆▇▄▅▇▃▆▁▃▆

0,1
Avg-Reward-100e,-246.90032
Episode,50.0
Episode Length,667.0
Epsilon,0.14798
Reward,-136.48607


[34m[1mwandb[0m: Agent Starting Run: 7aw1fz0r with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9512537693004894
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0016279018091735714
[34m[1mwandb[0m: 	update_target_net_interval: 20




[000] Reward:  -86.131 | Avg Reward:  -86.131 | e: 0.951
[001] Reward: -122.446 | Avg Reward: -104.288 | e: 0.905
[002] Reward:  -43.378 | Avg Reward:  -83.985 | e: 0.861
[003] Reward: -300.512 | Avg Reward: -138.117 | e: 0.819
[004] Reward: -227.288 | Avg Reward: -155.951 | e: 0.779
[005] Reward: -122.276 | Avg Reward: -150.338 | e: 0.741
[006] Reward: -140.934 | Avg Reward: -148.995 | e: 0.705
[007] Reward:  -93.187 | Avg Reward: -142.019 | e: 0.670
[008] Reward: -263.769 | Avg Reward: -155.547 | e: 0.638
[009] Reward: -276.130 | Avg Reward: -167.605 | e: 0.607
[010] Reward: -532.260 | Avg Reward: -200.756 | e: 0.577
[011] Reward: -334.349 | Avg Reward: -211.888 | e: 0.549
[012] Reward: -319.593 | Avg Reward: -220.173 | e: 0.522
[013] Reward: -587.078 | Avg Reward: -246.381 | e: 0.497
[014] Reward: -345.057 | Avg Reward: -252.959 | e: 0.473
[015] Reward: -211.781 | Avg Reward: -250.386 | e: 0.450
[016] Reward: -248.631 | Avg Reward: -250.282 | e: 0.428
[017] Reward: -225.295 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,█▇█▆▆▆▆▆▅▄▄▃▃▃▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▁▁▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▁▁▁▂▂▁▃▂▁▂▂▁▂▁█▅█
Epsilon,██▇▇▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁
Reward,█▇█▆▇▇█▆▄▆▆▆▇▆▇▆▆▄▆▆▆▄▁▆▆▅▆▄▆▅▅▆▆▅▆▆▆▄▅▆

0,1
Avg-Reward-100e,-334.82635
Episode,50.0
Episode Length,999.0
Epsilon,0.07818
Reward,-307.10982


[34m[1mwandb[0m: Agent Starting Run: 8b9zgg6x with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9577922776589493
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.001651274918938225
[34m[1mwandb[0m: 	update_target_net_interval: 100




[000] Reward: -125.230 | Avg Reward: -125.230 | e: 0.958
[001] Reward:  -85.974 | Avg Reward: -105.602 | e: 0.917
[002] Reward:  -49.252 | Avg Reward:  -86.818 | e: 0.879
[003] Reward: -194.198 | Avg Reward: -113.663 | e: 0.842
[004] Reward: -243.238 | Avg Reward: -139.578 | e: 0.806
[005] Reward:  -40.112 | Avg Reward: -123.001 | e: 0.772
[006] Reward:   -0.964 | Avg Reward: -105.567 | e: 0.739
[007] Reward: -108.160 | Avg Reward: -105.891 | e: 0.708
[008] Reward: -140.441 | Avg Reward: -109.730 | e: 0.678
[009] Reward: -224.392 | Avg Reward: -121.196 | e: 0.650
[010] Reward: -142.129 | Avg Reward: -123.099 | e: 0.622
[011] Reward: -130.499 | Avg Reward: -123.716 | e: 0.596
[012] Reward: -179.514 | Avg Reward: -128.008 | e: 0.571
[013] Reward: -221.874 | Avg Reward: -134.713 | e: 0.547
[014] Reward: -132.456 | Avg Reward: -134.562 | e: 0.524
[015] Reward:  -37.397 | Avg Reward: -128.489 | e: 0.502
[016] Reward: -357.519 | Avg Reward: -141.962 | e: 0.480
[017] Reward:  -16.595 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▇█▇▇██▇▆▆▅▆▅▆▇▇▆▆▇▇▆▆▄▄▅▄▄▄▄▄▄▃▄▃▃▂▂▂▂▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Episode Length,▂▁▂▂▂▃▂▂▂▃▂▂▂▁▂▂▂▃▃█▅▃▁▂▁▂▁▃▂▃▂▃▂▂▂▁▁▁▂▁
Epsilon,██▇▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
Reward,▇█▇█▇▇▇▇▇▆█▇██▇▆▇█▇▆▇▁██▇█▆█▆▇▆█▆▆▅█▆▆▅▅

0,1
Avg-Reward-100e,-199.49246
Episode,70.0
Episode Length,62.0
Epsilon,0.0468
Reward,-425.52752


[34m[1mwandb[0m: Agent Starting Run: c2ceynud with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9670469287354048
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0037506156576766177
[34m[1mwandb[0m: 	update_target_net_interval: 1




[000] Reward: -108.567 | Avg Reward: -108.567 | e: 0.967
[001] Reward: -192.847 | Avg Reward: -150.707 | e: 0.935
[002] Reward:  -66.860 | Avg Reward: -122.758 | e: 0.904
[003] Reward:   19.537 | Avg Reward:  -87.184 | e: 0.875
[004] Reward:  -86.907 | Avg Reward:  -87.129 | e: 0.846
[005] Reward: -186.236 | Avg Reward: -103.646 | e: 0.818
[006] Reward: -235.281 | Avg Reward: -122.451 | e: 0.791
[007] Reward: -154.470 | Avg Reward: -126.454 | e: 0.765
[008] Reward:  -61.526 | Avg Reward: -119.240 | e: 0.740
[009] Reward: -272.823 | Avg Reward: -134.598 | e: 0.715
[010] Reward: -313.404 | Avg Reward: -150.853 | e: 0.692
[011] Reward: -389.599 | Avg Reward: -170.749 | e: 0.669
[012] Reward: -402.825 | Avg Reward: -188.601 | e: 0.647
[013] Reward: -363.875 | Avg Reward: -201.120 | e: 0.626
[014] Reward: -338.169 | Avg Reward: -210.257 | e: 0.605
[015] Reward: -287.518 | Avg Reward: -215.086 | e: 0.585
[016] Reward: -246.323 | Avg Reward: -216.923 | e: 0.566
[017] Reward: -368.643 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▇▅▆█▇▆▆▇▅▄▄▃▂▂▂▁▁▁▂▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▂▂▁▂▂
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▂▁▂▁▂▂▁█▁▁▁▁▁▁▁▁▁▁▂▂▁▁▁▂▁▁▂▂▂▁▁▁▁▂▂▂▂▁▂▁
Epsilon,██▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
Reward,▆▅▇█▅▅▆▇▄▃▂▃▄▄▃▄▅▅▄▂▅▇▁▅▂▂▂▅▇▅▅▃▅▆▆▆▂▃▆▆

0,1
Avg-Reward-100e,-232.29921
Episode,50.0
Episode Length,87.0
Epsilon,0.18106
Reward,-103.30589


[34m[1mwandb[0m: Agent Starting Run: 2fo4e1qo with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.963118441067021
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.005696342913301236
[34m[1mwandb[0m: 	update_target_net_interval: 100




[000] Reward: -393.807 | Avg Reward: -393.807 | e: 0.963
[001] Reward: -143.526 | Avg Reward: -268.667 | e: 0.928
[002] Reward: -132.983 | Avg Reward: -223.439 | e: 0.893
[003] Reward: -226.107 | Avg Reward: -224.106 | e: 0.860
[004] Reward:  -58.493 | Avg Reward: -190.983 | e: 0.829
[005] Reward: -258.096 | Avg Reward: -202.169 | e: 0.798
[006] Reward: -130.146 | Avg Reward: -191.880 | e: 0.769
[007] Reward:  -88.790 | Avg Reward: -178.994 | e: 0.740
[008] Reward: -260.612 | Avg Reward: -188.062 | e: 0.713
[009] Reward: -167.512 | Avg Reward: -186.007 | e: 0.687
[010] Reward: -189.449 | Avg Reward: -186.320 | e: 0.661
[011] Reward: -125.783 | Avg Reward: -181.275 | e: 0.637
[012] Reward: -116.616 | Avg Reward: -176.302 | e: 0.614
[013] Reward:  -82.069 | Avg Reward: -169.571 | e: 0.591
[014] Reward:  -45.181 | Avg Reward: -161.278 | e: 0.569
[015] Reward:  -71.819 | Avg Reward: -155.687 | e: 0.548
[016] Reward:  -53.977 | Avg Reward: -149.704 | e: 0.528
[017] Reward:  -41.289 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▅▆▆▆▆▇▇▇▇▇███████████▇▇▇▇▇▇▇▇▇▇▆▆▆▆▆▆▆▅
Episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Episode Length,▂▂▂▂▄▃▃▅▃▆█▄▂▃▄▁▂▅▃▂▂▄▄▂▃▁▁▄▂▄▂▃▃▁▁▁▂▂▃▁
Epsilon,█▇▇▆▅▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Reward,▂▄▆▅▆▇▆▇█▆▆█▇▇█▇▆▄▇█▆▆▃▇▂▅▅▅▄▄▄▃▁▃▄▃▂▃▂▄

0,1
Avg-Reward-100e,-197.57147
Episode,121.0
Episode Length,67.0
Epsilon,0.01021
Reward,-269.40437


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ktths326 with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.959334336801774
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.003178252571117092
[34m[1mwandb[0m: 	update_target_net_interval: 10




[000] Reward: -337.965 | Avg Reward: -337.965 | e: 0.959
[001] Reward: -112.936 | Avg Reward: -225.450 | e: 0.920
[002] Reward: -188.034 | Avg Reward: -212.978 | e: 0.883
[003] Reward: -162.864 | Avg Reward: -200.450 | e: 0.847
[004] Reward: -222.427 | Avg Reward: -204.845 | e: 0.813
[005] Reward:  -90.132 | Avg Reward: -185.726 | e: 0.780
[006] Reward:  -95.038 | Avg Reward: -172.771 | e: 0.748
[007] Reward:  -94.403 | Avg Reward: -162.975 | e: 0.717
[008] Reward: -198.016 | Avg Reward: -166.868 | e: 0.688
[009] Reward:  -88.842 | Avg Reward: -159.066 | e: 0.660
[010] Reward: -131.415 | Avg Reward: -156.552 | e: 0.633
[011] Reward: -114.960 | Avg Reward: -153.086 | e: 0.608
[012] Reward: -127.454 | Avg Reward: -151.114 | e: 0.583
[013] Reward: -107.187 | Avg Reward: -147.977 | e: 0.559
[014] Reward: -168.214 | Avg Reward: -149.326 | e: 0.536
[015] Reward: -109.690 | Avg Reward: -146.849 | e: 0.515
[016] Reward:  -99.703 | Avg Reward: -144.075 | e: 0.494
[017] Reward:  -72.937 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▅▅▆▆▇▇▇▇▇▇▇█████▇▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▂▃▂▁▂▂▂▃██▅▂██▂▃
Epsilon,██▇▇▆▆▆▆▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
Reward,▄▇▆▇███▆▇▇▇▇▇███▄▂▃▄▁▆█▆▁▅▅▅▆▆▃▆▅▆▇█▆▆▅█

0,1
Avg-Reward-100e,-225.11952
Episode,50.0
Episode Length,293.0
Epsilon,0.12036
Reward,-85.67436


[34m[1mwandb[0m: Agent Starting Run: 9hntt44a with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9665971726319688
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.00029231696625000024
[34m[1mwandb[0m: 	update_target_net_interval: 10




[000] Reward: -111.495 | Avg Reward: -111.495 | e: 0.967
[001] Reward: -295.932 | Avg Reward: -203.713 | e: 0.934
[002] Reward:  -72.859 | Avg Reward: -160.095 | e: 0.903
[003] Reward: -189.833 | Avg Reward: -167.530 | e: 0.873
[004] Reward: -202.727 | Avg Reward: -174.569 | e: 0.844
[005] Reward: -308.713 | Avg Reward: -196.926 | e: 0.816
[006] Reward:  -62.102 | Avg Reward: -177.666 | e: 0.788
[007] Reward: -135.585 | Avg Reward: -172.406 | e: 0.762
[008] Reward:  -90.041 | Avg Reward: -163.254 | e: 0.737
[009] Reward: -144.546 | Avg Reward: -161.383 | e: 0.712
[010] Reward: -101.582 | Avg Reward: -155.947 | e: 0.688
[011] Reward: -129.236 | Avg Reward: -153.721 | e: 0.665
[012] Reward:  -15.781 | Avg Reward: -143.110 | e: 0.643
[013] Reward:  -92.171 | Avg Reward: -139.472 | e: 0.621
[014] Reward:  -83.794 | Avg Reward: -135.760 | e: 0.601
[015] Reward: -124.161 | Avg Reward: -135.035 | e: 0.581
[016] Reward: -114.003 | Avg Reward: -133.798 | e: 0.561
[017] Reward:  -58.725 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▂▂▂▂▃▃▄▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████
Episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Episode Length,▁▁▁▂▂▇▄▂▄▆▃█▃▄▅▄▂▃▂▃▄▆▃█▃▂▂▂▂▃▂▂▂▄▃▃▃▃▄▂
Epsilon,█▆▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Reward,▁▂▂▄▃▆▇▅▇▇▇▆▇▇▇▇▇▇▇▇▇▇█▄▇▇▃▄█▇██▇▇█▇▇▇▆▇

0,1
Avg-Reward-100e,239.7941
Episode,499.0
Episode Length,298.0
Epsilon,0.00985
Reward,270.98086


[34m[1mwandb[0m: Agent Starting Run: 9pzgz1w9 with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9787430365217468
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.00857121379461832
[34m[1mwandb[0m: 	update_target_net_interval: 5




[000] Reward: -284.826 | Avg Reward: -284.826 | e: 0.979
[001] Reward: -319.366 | Avg Reward: -302.096 | e: 0.958
[002] Reward: -281.848 | Avg Reward: -295.346 | e: 0.938
[003] Reward:  -66.351 | Avg Reward: -238.098 | e: 0.918
[004] Reward: -310.443 | Avg Reward: -252.567 | e: 0.898
[005] Reward: -135.670 | Avg Reward: -233.084 | e: 0.879
[006] Reward: -375.677 | Avg Reward: -253.454 | e: 0.860
[007] Reward: -271.935 | Avg Reward: -255.764 | e: 0.842
[008] Reward: -245.356 | Avg Reward: -254.608 | e: 0.824
[009] Reward: -225.167 | Avg Reward: -251.664 | e: 0.807
[010] Reward:  -91.045 | Avg Reward: -237.062 | e: 0.790
[011] Reward: -118.260 | Avg Reward: -227.162 | e: 0.773
[012] Reward: -338.497 | Avg Reward: -235.726 | e: 0.756
[013] Reward: -394.692 | Avg Reward: -247.081 | e: 0.740
[014] Reward:  -83.148 | Avg Reward: -236.152 | e: 0.724
[015] Reward: -436.656 | Avg Reward: -248.683 | e: 0.709
[016] Reward: -322.180 | Avg Reward: -253.007 | e: 0.694
[017] Reward:  -43.704 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▃▂▃▇█▆▆▆▇█▇▇▆▆▇▅▄▄▅▄▅▄▄▃▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▂▁▂▁▁▂▂▂▁▁▁▁▂▁▁▂▁▁█▂▂▁▁▁▂▂▂▂▂▁▂▁▂▂▁▁▁▂▁▁
Epsilon,███▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁
Reward,▅▅▅█▇▄▅▆▇▇▅▇▄▅█▄▃▆█▃▇▃▃▃▁▃▃▇▄▇▅▄▂▄▆▆▄▅▅▆

0,1
Avg-Reward-100e,-316.51135
Episode,50.0
Episode Length,94.0
Epsilon,0.33427
Reward,-260.88386


[34m[1mwandb[0m: Agent Starting Run: werqc8vn with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.983625883641753
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.005596658823893329
[34m[1mwandb[0m: 	update_target_net_interval: 5




[000] Reward: -361.099 | Avg Reward: -361.099 | e: 0.984
[001] Reward:  -91.462 | Avg Reward: -226.280 | e: 0.968
[002] Reward: -125.901 | Avg Reward: -192.821 | e: 0.952
[003] Reward:   28.549 | Avg Reward: -137.478 | e: 0.936
[004] Reward: -399.064 | Avg Reward: -189.795 | e: 0.921
[005] Reward:  -87.525 | Avg Reward: -172.750 | e: 0.906
[006] Reward: -203.771 | Avg Reward: -177.182 | e: 0.891
[007] Reward: -105.719 | Avg Reward: -168.249 | e: 0.876
[008] Reward: -259.818 | Avg Reward: -178.423 | e: 0.862
[009] Reward: -349.275 | Avg Reward: -195.509 | e: 0.848
[010] Reward: -119.095 | Avg Reward: -188.562 | e: 0.834
[011] Reward: -333.963 | Avg Reward: -200.679 | e: 0.820
[012] Reward: -105.912 | Avg Reward: -193.389 | e: 0.807
[013] Reward: -108.733 | Avg Reward: -187.342 | e: 0.794
[014] Reward: -380.547 | Avg Reward: -200.222 | e: 0.781
[015] Reward:  -54.405 | Avg Reward: -191.109 | e: 0.768
[016] Reward: -412.156 | Avg Reward: -204.112 | e: 0.755
[017] Reward: -219.577 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▅▆█▇▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▅▅▅▅▄▄▄▅▅▅▅
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▂▁▂▂▁▂▁▂▂▂▁▃▃▄▄▂▁▂▂▃▄▄▁▇▂▃▂▃▃▆▅▄▄▅▅█▅▆▆▇
Epsilon,███▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
Reward,▄▇▆█▇▆▇▅▇▄▇▄▇▄▆▆▆▄▇▆▆▇▇▅▅▃▄▂▆▁▃▃▄▃▅▅█▆▃▄

0,1
Avg-Reward-100e,-248.18047
Episode,50.0
Episode Length,185.0
Epsilon,0.43085
Reward,-395.10275


[34m[1mwandb[0m: Agent Starting Run: 4c1wsxz1 with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9622543607385292
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0071816997018404405
[34m[1mwandb[0m: 	update_target_net_interval: 50




[000] Reward: -118.249 | Avg Reward: -118.249 | e: 0.962
[001] Reward:  -52.223 | Avg Reward:  -85.236 | e: 0.926
[002] Reward: -370.105 | Avg Reward: -180.192 | e: 0.891
[003] Reward: -212.842 | Avg Reward: -188.355 | e: 0.857
[004] Reward: -117.374 | Avg Reward: -174.159 | e: 0.825
[005] Reward:  -87.053 | Avg Reward: -159.641 | e: 0.794
[006] Reward: -210.159 | Avg Reward: -166.858 | e: 0.764
[007] Reward:  -92.185 | Avg Reward: -157.524 | e: 0.735
[008] Reward:  -84.928 | Avg Reward: -149.458 | e: 0.707
[009] Reward:  -16.415 | Avg Reward: -136.153 | e: 0.681
[010] Reward: -114.005 | Avg Reward: -134.140 | e: 0.655
[011] Reward:  -76.500 | Avg Reward: -129.337 | e: 0.630
[012] Reward:  -76.781 | Avg Reward: -125.294 | e: 0.606
[013] Reward: -130.865 | Avg Reward: -125.692 | e: 0.584
[014] Reward: -111.635 | Avg Reward: -124.755 | e: 0.561
[015] Reward: -126.923 | Avg Reward: -124.890 | e: 0.540
[016] Reward: -221.757 | Avg Reward: -130.588 | e: 0.520
[017] Reward: -106.333 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▇▂▃▃▅▆▆▆▆▆▇▇▇█▇███▇▇▇▇▆▅▅▄▄▃▃▃▂▂▂▂▂▂▁▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Episode Length,▂▂▂▂▂▁▃▂▁▄▁▁▂▂▃▂▄▂▂▃▂▂▄▂▄▁▂▂▂▂▃▂▂▄▂▂▂█▄█
Epsilon,█▇▇▆▆▆▅▅▅▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
Reward,▇▃▇▅▇▇▇▇▇▆▇▇▇█▆█▇▇▄▅▅▆▄▃▁▃▄▃▄▃▄▄▃▅▅▆▅▂▄▄

0,1
Avg-Reward-100e,-199.15069
Episode,84.0
Episode Length,332.0
Epsilon,0.03799
Reward,-275.13922


[34m[1mwandb[0m: Agent Starting Run: 7xq8tbz7 with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9840107113750036
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.007979575449058618
[34m[1mwandb[0m: 	update_target_net_interval: 5




[000] Reward: -215.198 | Avg Reward: -215.198 | e: 0.984
[001] Reward: -344.256 | Avg Reward: -279.727 | e: 0.968
[002] Reward: -438.988 | Avg Reward: -332.814 | e: 0.953
[003] Reward: -127.609 | Avg Reward: -281.513 | e: 0.938
[004] Reward: -136.194 | Avg Reward: -252.449 | e: 0.923
[005] Reward: -127.461 | Avg Reward: -231.618 | e: 0.908
[006] Reward: -177.974 | Avg Reward: -223.954 | e: 0.893
[007] Reward: -117.222 | Avg Reward: -210.613 | e: 0.879
[008] Reward: -346.198 | Avg Reward: -225.678 | e: 0.865
[009] Reward: -113.739 | Avg Reward: -214.484 | e: 0.851
[010] Reward: -206.952 | Avg Reward: -213.799 | e: 0.838
[011] Reward: -139.388 | Avg Reward: -207.598 | e: 0.824
[012] Reward: -213.035 | Avg Reward: -208.016 | e: 0.811
[013] Reward: -195.180 | Avg Reward: -207.100 | e: 0.798
[014] Reward: -241.942 | Avg Reward: -209.422 | e: 0.785
[015] Reward:  -44.952 | Avg Reward: -199.143 | e: 0.773
[016] Reward: -110.847 | Avg Reward: -193.949 | e: 0.760
[017] Reward: -210.993 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▅▁▄▄▅▅▆▆▆▇▇▇▇▇████▇█▇▇▇▇▇▇▇▇▇▆▆▆▆▆▆▆▆▆▅▅
Episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Episode Length,▂▂▁▂▁▁▁▁▁▁▂▁▁▁▁▂▂▂▃▁▄▂▂▂▄▂▂▂▂▃▂▄▂▄▂▄▄▅▂█
Epsilon,██▇▇▇▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
Reward,▆▄▇▅▇▆▇▇█▇▇█▇██▇▇█▆█▅█▆▄▅▅▆▆▅▄█▄▇▅▇▃▆▁▆▅

0,1
Avg-Reward-100e,-199.18762
Episode,109.0
Episode Length,742.0
Epsilon,0.16982
Reward,-357.16375


[34m[1mwandb[0m: Agent Starting Run: 7mdgkodb with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9896016980339412
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.005162108062851291
[34m[1mwandb[0m: 	update_target_net_interval: 5




[000] Reward: -434.118 | Avg Reward: -434.118 | e: 0.990
[001] Reward: -173.099 | Avg Reward: -303.608 | e: 0.979
[002] Reward: -265.609 | Avg Reward: -290.942 | e: 0.969
[003] Reward: -239.701 | Avg Reward: -278.131 | e: 0.959
[004] Reward: -153.486 | Avg Reward: -253.202 | e: 0.949
[005] Reward: -213.247 | Avg Reward: -246.543 | e: 0.939
[006] Reward: -328.862 | Avg Reward: -258.303 | e: 0.929
[007] Reward: -144.237 | Avg Reward: -244.045 | e: 0.920
[008] Reward: -387.377 | Avg Reward: -259.970 | e: 0.910
[009] Reward: -112.593 | Avg Reward: -245.233 | e: 0.901
[010] Reward: -118.152 | Avg Reward: -233.680 | e: 0.891
[011] Reward: -194.270 | Avg Reward: -230.396 | e: 0.882
[012] Reward: -181.402 | Avg Reward: -226.627 | e: 0.873
[013] Reward:  -86.354 | Avg Reward: -216.607 | e: 0.864
[014] Reward: -123.394 | Avg Reward: -210.393 | e: 0.855
[015] Reward: -166.479 | Avg Reward: -207.649 | e: 0.846
[016] Reward: -381.268 | Avg Reward: -217.862 | e: 0.837
[017] Reward:  -23.898 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▅▅▆▆▆▇▆▇▇▇██▇████████████▇█▇▇▇▇▇▇▇▇▇▇▇▇
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▄▅▃▃▃▄▅▄▂▂▁▄▄▃▄▃▆▄▄▂▂▇▅▁▂▁▃▂▅█▃▁█▃▃▄▆▆▅▃
Epsilon,███▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
Reward,▂▆▅▅▅▄▆▃▇▆▆▆▆▃█▇▅▆▄▇▄▄▃▅▅▇▁█▁▁▃▆▇▂▇▄▆▆▅▆

0,1
Avg-Reward-100e,-228.87355
Episode,50.0
Episode Length,98.0
Epsilon,0.58679
Reward,-145.00423


[34m[1mwandb[0m: Agent Starting Run: jzw6jdzi with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9811296544258616
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0009408269893628528
[34m[1mwandb[0m: 	update_target_net_interval: 20




[000] Reward: -131.186 | Avg Reward: -131.186 | e: 0.981
[001] Reward: -300.631 | Avg Reward: -215.908 | e: 0.963
[002] Reward:  -70.335 | Avg Reward: -167.384 | e: 0.944
[003] Reward: -176.260 | Avg Reward: -169.603 | e: 0.927
[004] Reward: -199.747 | Avg Reward: -175.632 | e: 0.909
[005] Reward: -158.789 | Avg Reward: -172.825 | e: 0.892
[006] Reward: -105.113 | Avg Reward: -163.151 | e: 0.875
[007] Reward: -106.140 | Avg Reward: -156.025 | e: 0.859
[008] Reward: -111.319 | Avg Reward: -151.058 | e: 0.842
[009] Reward: -294.993 | Avg Reward: -165.451 | e: 0.827
[010] Reward:  -78.889 | Avg Reward: -157.582 | e: 0.811
[011] Reward: -110.622 | Avg Reward: -153.669 | e: 0.796
[012] Reward:  -65.647 | Avg Reward: -146.898 | e: 0.781
[013] Reward:  -95.565 | Avg Reward: -143.231 | e: 0.766
[014] Reward:  -52.622 | Avg Reward: -137.190 | e: 0.751
[015] Reward:   -1.501 | Avg Reward: -128.710 | e: 0.737
[016] Reward:  -52.769 | Avg Reward: -124.243 | e: 0.723
[017] Reward: -118.171 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▂▂▂▂▂▂▂▂▂▃▃▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇█████████
Episode,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Episode Length,▁▁▁▁▂▂▃▄▂▇█▃▃▂▄▃▄▄▆▄▇▁▅▄▂▅▂▂▃▃▃▂▂▂▂▃▂▃▃▃
Epsilon,█▇▅▄▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Reward,▄▅▅▅▅▃▅▇▄▆▆██▄█▇▄▇▆▃▇▁▃▇▅▇██▇███████████

0,1
Avg-Reward-100e,242.47118
Episode,499.0
Episode Length,216.0
Epsilon,0.00995
Reward,251.67447


[34m[1mwandb[0m: Agent Starting Run: 4vf7gpb3 with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9809094575353198
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.0072046037817169685
[34m[1mwandb[0m: 	update_target_net_interval: 1




[000] Reward: -405.373 | Avg Reward: -405.373 | e: 0.981
[001] Reward: -240.503 | Avg Reward: -322.938 | e: 0.962
[002] Reward: -129.203 | Avg Reward: -258.359 | e: 0.944
[003] Reward: -114.903 | Avg Reward: -222.495 | e: 0.926
[004] Reward: -117.972 | Avg Reward: -201.591 | e: 0.908
[005] Reward: -346.335 | Avg Reward: -225.715 | e: 0.891
[006] Reward: -243.180 | Avg Reward: -228.210 | e: 0.874
[007] Reward:  -99.803 | Avg Reward: -212.159 | e: 0.857
[008] Reward: -168.781 | Avg Reward: -207.339 | e: 0.841
[009] Reward:  -90.520 | Avg Reward: -195.657 | e: 0.825
[010] Reward: -117.692 | Avg Reward: -188.569 | e: 0.809
[011] Reward: -162.002 | Avg Reward: -186.355 | e: 0.794
[012] Reward: -105.497 | Avg Reward: -180.136 | e: 0.778
[013] Reward: -159.435 | Avg Reward: -178.657 | e: 0.763
[014] Reward:  -55.740 | Avg Reward: -170.462 | e: 0.749
[015] Reward:  -36.385 | Avg Reward: -162.083 | e: 0.735
[016] Reward:  -74.519 | Avg Reward: -156.932 | e: 0.721
[017] Reward:  -63.809 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▃▅▆▅▅▆▆▆▆▆▇▇▇▇▇▇▇███████████▇▇▇▇▇▆▆▆▆▆▆
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
Episode Length,▁▂▂▁▂▁▂▁▂▁▁▁▂▁▂▂▂▂▁▁▁▂▁▂▃▂█▁▁▂▂▁▂▂▂▂▂▃▁▁
Epsilon,██▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
Reward,▃▅▆▆▅▇▇▆▆▇▇▇▆▇▇▆▇▇█▇▇██▇▇▇▅▁▆▄▃▄▄▃▃▂▄▄▄▃

0,1
Avg-Reward-100e,-196.49676
Episode,62.0
Episode Length,62.0
Epsilon,0.29691
Reward,-370.03453


[34m[1mwandb[0m: Agent Starting Run: 0q8y2lkm with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9807808108519736
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.008197386941529322
[34m[1mwandb[0m: 	update_target_net_interval: 10




[000] Reward: -241.345 | Avg Reward: -241.345 | e: 0.981
[001] Reward: -104.906 | Avg Reward: -173.125 | e: 0.962
[002] Reward: -108.828 | Avg Reward: -151.693 | e: 0.943
[003] Reward: -101.038 | Avg Reward: -139.029 | e: 0.925
[004] Reward: -131.614 | Avg Reward: -137.546 | e: 0.908
[005] Reward: -117.628 | Avg Reward: -134.226 | e: 0.890
[006] Reward: -462.678 | Avg Reward: -181.148 | e: 0.873
[007] Reward:  -93.710 | Avg Reward: -170.218 | e: 0.856
[008] Reward: -224.219 | Avg Reward: -176.218 | e: 0.840
[009] Reward: -106.811 | Avg Reward: -169.278 | e: 0.824
[010] Reward: -385.024 | Avg Reward: -188.891 | e: 0.808
[011] Reward:  -55.511 | Avg Reward: -177.776 | e: 0.792
[012] Reward: -152.060 | Avg Reward: -175.798 | e: 0.777
[013] Reward: -397.495 | Avg Reward: -191.633 | e: 0.762
[014] Reward:  -94.715 | Avg Reward: -185.172 | e: 0.747
[015] Reward: -195.492 | Avg Reward: -185.817 | e: 0.733
[016] Reward: -291.255 | Avg Reward: -192.019 | e: 0.719
[017] Reward: -468.527 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▃▆▇██▆▆▆▅▆▆▅▅▅▄▄▄▃▄▄▃▃▃▄▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
Episode Length,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▃▂█▂▂
Epsilon,███▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁
Reward,▆▇▇▇▇▃▇▆▄█▇▇▆▅▃▅▄▄▅▆▄▄██▄▇▅▁▂▄▆▅▅▆▆▅▁▅▆▇

0,1
Avg-Reward-100e,-268.24014
Episode,50.0
Episode Length,210.0
Epsilon,0.37168
Reward,-82.73466


[34m[1mwandb[0m: Agent Starting Run: zyvcova2 with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.9681680420301172
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.009646972411517326
[34m[1mwandb[0m: 	update_target_net_interval: 1




[000] Reward: -468.746 | Avg Reward: -468.746 | e: 0.968
[001] Reward: -245.441 | Avg Reward: -357.093 | e: 0.937
[002] Reward: -145.440 | Avg Reward: -286.542 | e: 0.908
[003] Reward: -128.812 | Avg Reward: -247.110 | e: 0.879
[004] Reward: -229.571 | Avg Reward: -243.602 | e: 0.851
[005] Reward:  -70.138 | Avg Reward: -214.691 | e: 0.824
[006] Reward:  -89.676 | Avg Reward: -196.832 | e: 0.797
[007] Reward: -130.261 | Avg Reward: -188.511 | e: 0.772
[008] Reward: -139.565 | Avg Reward: -183.072 | e: 0.747
[009] Reward: -289.310 | Avg Reward: -193.696 | e: 0.724
[010] Reward: -385.589 | Avg Reward: -211.141 | e: 0.701
[011] Reward:  -70.837 | Avg Reward: -199.449 | e: 0.678
[012] Reward: -120.550 | Avg Reward: -193.380 | e: 0.657
[013] Reward:  -96.094 | Avg Reward: -186.431 | e: 0.636
[014] Reward:  -40.999 | Avg Reward: -176.735 | e: 0.616
[015] Reward: -117.231 | Avg Reward: -173.016 | e: 0.596
[016] Reward:  -88.157 | Avg Reward: -168.025 | e: 0.577
[017] Reward:  -53.035 | Avg Re

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Avg-Reward-100e,▁▄▅▆▆▆▆▇▇▇▇▇▇▇█████▇▇▇▇▇▆▆▆▇▇▇▇▇▇▇▇▇▆▆▆▆
Episode,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇████
Episode Length,▁▁▁▁▁▁▁▂▂▁▄▂▂▂█▂▂█▃▁▂▃▁▃█▂▂▂▂▂▄▄█▃▂▁▂█▂▁
Epsilon,██▇▇▆▆▅▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
Reward,▃▆▅▇▅▇▇▇▇▇▆██▇█▇█▇▁▂▅▄▃▆▂▅▄▆▅▄▆▇▇▅▆▅▂▇▆▂

0,1
Avg-Reward-100e,-191.22385
Episode,90.0
Episode Length,57.0
Epsilon,0.05266
Reward,-430.13467


[34m[1mwandb[0m: Agent Starting Run: u4sir0ff with config:
[34m[1mwandb[0m: 	episodes: 500
[34m[1mwandb[0m: 	epsilon: 1
[34m[1mwandb[0m: 	epsilon_decay: 0.970367953291368
[34m[1mwandb[0m: 	gamma: 0.99
[34m[1mwandb[0m: 	lr: 0.001322556504600355
[34m[1mwandb[0m: 	update_target_net_interval: 5




[000] Reward:  -65.234 | Avg Reward:  -65.234 | e: 0.970
[001] Reward: -303.445 | Avg Reward: -184.339 | e: 0.942
[002] Reward: -170.138 | Avg Reward: -179.606 | e: 0.914
[003] Reward: -185.403 | Avg Reward: -181.055 | e: 0.887
[004] Reward: -258.859 | Avg Reward: -196.616 | e: 0.860
[005] Reward:  -80.735 | Avg Reward: -177.302 | e: 0.835
[006] Reward: -511.560 | Avg Reward: -225.053 | e: 0.810
[007] Reward:  -75.399 | Avg Reward: -206.346 | e: 0.786
[008] Reward:  -10.577 | Avg Reward: -184.594 | e: 0.763
[009] Reward: -408.444 | Avg Reward: -206.979 | e: 0.740
[010] Reward:  -36.037 | Avg Reward: -191.439 | e: 0.718
[011] Reward: -181.598 | Avg Reward: -190.619 | e: 0.697
[012] Reward: -330.354 | Avg Reward: -201.368 | e: 0.676
[013] Reward: -177.043 | Avg Reward: -199.630 | e: 0.656
[014] Reward: -209.914 | Avg Reward: -200.316 | e: 0.637
[015] Reward: -102.744 | Avg Reward: -194.218 | e: 0.618
[016] Reward: -139.987 | Avg Reward: -191.028 | e: 0.600
[017] Reward: -462.332 | Avg Re