# Playing Differentiated Demand Environemnt

In [8]:
#Imports

from marketsai.markets.diff_demand import DiffDemand

#import ray

from ray import tune, shutdown, init
from ray.tune.registry import register_env
from ray.rllib.agents.a3c.a2c import A2CTrainer
from ray.rllib.agents.dqn.dqn import DQNTrainer
from ray.tune.integration.mlflow import MLflowLoggerCallback
from ray.rllib.utils.exploration.epsilon_greedy import EpsilonGreedy
from ray.rllib.utils.schedules.exponential_schedule import ExponentialSchedule

import random
import math
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging

In [9]:
# STEP 0: Inititialize ray
NUM_CPUS = 9
shutdown()
init(num_cpus=NUM_CPUS, 
    logging_level=logging.ERROR,
)

{'node_ip_address': '192.168.1.202',
 'raylet_ip_address': '192.168.1.202',
 'redis_address': '192.168.1.202:6379',
 'object_store_address': '/tmp/ray/session_2021-04-12_13-05-05_604932_19427/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2021-04-12_13-05-05_604932_19427/sockets/raylet',
 'webui_url': '127.0.0.1:8266',
 'session_dir': '/tmp/ray/session_2021-04-12_13-05-05_604932_19427',
 'metrics_export_port': 59320,
 'node_id': '07cf9e3eb4b811bf9b080e0de71da3c053e3e86d76dd16367ae9ff62'}

In [10]:
# STEP 1: register environment
register_env("diffdemand", DiffDemand)
env = DiffDemand()
policy_ids = [f"policy_{i}" for i in range(env.n_agents)]

In [11]:
# STEP 2: Experiment configuration

MAX_STEPS = 3000 * 1000
PRICE_BAND_WIDE = 0.1
LOWER_PRICE = 1.47 - PRICE_BAND_WIDE
HIGHER_PRICE = 1.93 + PRICE_BAND_WIDE
DEC_RATE = float(math.e ** (-4 * 10 ** (-6)))
DEC_RATE_HIGH = float(math.e ** (-4 * 10 ** (-6) * 4))

env_config = {
    "mkt_config": {
    "lower_price": [LOWER_PRICE for i in range(env.n_agents)],
    "higher_price": [HIGHER_PRICE for i in range(env.n_agents)],
    "parameteres": {
                "cost": [1 for i in range(env.n_agents)],
                "values": [2 for i in range(env.n_agents)],
                "ext_demand": 0,
                "substitution": 0.25,
    },
    "space_type": "MultiDiscrete",
    "gridpoints": 16,
    }
}


exploration_config = {"type": "EpsilonGreedy",
    "epsilon_schedule": ExponentialSchedule(
      schedule_timesteps = int(1),
      framework="Torch",
      initial_p=float(1.0),
      decay_rate=float(DEC_RATE),
    ),
}

training_config = {
    "gamma": 0.95,
    "lr": 0.15,
    "env": "diffdemand",
    "exploration_config": exploration_config,
    "env_config": env_config,
    "horizon": 100,
    "soft_horizon": True,
    "no_done_at_end": True,
    "multiagent": {
        "policies": {
            policy_ids[i]: (
                None,
                env.observation_space["agent_{}".format(i)],
                env.action_space["agent_{}".format(i)],
                {},
            )
            for i in range(env.n_agents)
        },
        "policy_mapping_fn": (lambda agent_id: policy_ids[int(agent_id.split("_")[1])]),
    },
    "framework": "torch",
    "num_workers": NUM_CPUS - 1,
    "num_gpus": 0,
    "timesteps_per_iteration": 1000,
}

stop = {"training_iteration": MAX_STEPS/1000}

In [12]:
#Step 3: Experiments

exp_name = "DQN_test_April9"
results = tune.run(
    "DQN",
    name=exp_name,
    config=training_config,
    #checkpoint_freq=250,
    checkpoint_at_end=True,
    stop=stop,
    metric="episode_reward_mean",
    mode="max",
    callbacks=[MLflowLoggerCallback(experiment_name=exp_name, save_artifact=True)],
    verbose=3
)

best_checkpoint_DQN = results.best_checkpoint
print("Best checkpont:", best_checkpoint_DQN)

Trial name,status,loc
DQN_diffdemand_3b8c4_00000,RUNNING,


 been deprecated. Use `num_framestacks (int)` instead. This will raise an error in the future!
[2m[36m(pid=21200)[0m {'agent_0': array([1.6, 1.6]), 'agent_1': array([1.6, 1.6])} {'agent_0': 0.27249236968976237, 'agent_1': 0.27249236968976237} {'__all__': False} {'agent_0': 1.6, 'agent_1': 1.6}
[2m[36m(pid=21200)[0m {'agent_0': array([1.6, 1.6]), 'agent_1': array([1.6, 1.6])} {'agent_0': 0.27249236968976237, 'agent_1': 0.27249236968976237} {'__all__': False} {'agent_0': 1.6, 'agent_1': 1.6}
[2m[36m(pid=21201)[0m {'agent_0': array([1.6, 1.6]), 'agent_1': array([1.6, 1.6])} {'agent_0': 0.27249236968976237, 'agent_1': 0.27249236968976237} {'__all__': False} {'agent_0': 1.6, 'agent_1': 1.6}
[2m[36m(pid=21201)[0m {'agent_0': array([1.6, 1.6]), 'agent_1': array([1.6, 1.6])} {'agent_0': 0.27249236968976237, 'agent_1': 0.27249236968976237} {'__all__': False} {'agent_0': 1.6, 'agent_1': 1.6}
[2m[36m(pid=21197)[0m {'agent_0': array([1.6, 1.6]), 'agent_1': array([1.6, 1.6])} {'agent

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1,0.92447,1024,54.0899,55.2714,53.11,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-05-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.17082453405669
  episode_reward_mean: 53.943379492880304
  episode_reward_min: 51.752128146270366
  episodes_this_iter: 8
  episodes_total: 48
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 5120
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 16.35887336730957
        max_q: 41.77446746826172
        mean_q: 40.79328155517578
        mean_td_error: 1.3590792417526245
        min_q: 39.8621711730957
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 14.293238639831543
        max_q: 53.321693420410156
        mean_q: 52.71045684814453
        mean_td_error: 1.1758366823196411
        min_q: 51.89693832397461
    num_steps_sampled: 5120
    num_steps_trained: 4128
    num_ta

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,5,7.17838,5120,53.9434,56.1708,51.7521,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-05-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.17082453405669
  episode_reward_mean: 53.992152705023166
  episode_reward_min: 51.52952069043702
  episodes_this_iter: 8
  episodes_total: 88
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 9216
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 13.688346862792969
        max_q: 34.445037841796875
        mean_q: 34.00699234008789
        mean_td_error: -1.2947319746017456
        min_q: 33.341636657714844
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 14.080678939819336
        max_q: 43.24806594848633
        mean_q: 42.11859893798828
        mean_td_error: 1.7403194904327393
        min_q: 39.99506759643555
    num_steps_sampled: 9216
    num_steps_trained: 8224
    num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,9,13.4075,9216,53.9922,56.1708,51.5295,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-05-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.17082453405669
  episode_reward_mean: 53.97798606546476
  episode_reward_min: 51.52952069043702
  episodes_this_iter: 8
  episodes_total: 128
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 13312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 1.2917709350585938
        max_q: 30.514339447021484
        mean_q: 30.235212326049805
        mean_td_error: -0.10063982009887695
        min_q: 29.918378829956055
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 11.18481159210205
        max_q: 41.224220275878906
        mean_q: 40.507938385009766
        mean_td_error: 1.109839677810669
        min_q: 39.25877380371094
    num_steps_sampled: 13312
    num_steps_trained: 12320
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,13,19.368,13312,53.978,56.1708,51.5295,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-05-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.29587240220278
  episode_reward_mean: 54.04637064506474
  episode_reward_min: 51.55336103433865
  episodes_this_iter: 8
  episodes_total: 168
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 17408
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.14043332636356354
        max_q: 23.766019821166992
        mean_q: 23.489858627319336
        mean_td_error: 0.040197789669036865
        min_q: 23.3374080657959
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 9.448989868164062
        max_q: 32.16331100463867
        mean_q: 31.75145721435547
        mean_td_error: -0.9414322972297668
        min_q: 30.825101852416992
    num_steps_sampled: 17408
    num_steps_trained: 16416
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,17,25.4142,17408,54.0464,56.2959,51.5534,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-05-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.45342926439843
  episode_reward_mean: 54.13484710701797
  episode_reward_min: 51.41956301828532
  episodes_this_iter: 8
  episodes_total: 208
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 21504
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 1.2425652742385864
        max_q: 19.799942016601562
        mean_q: 19.65974235534668
        mean_td_error: 0.4077872037887573
        min_q: 19.513961791992188
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 4.681492328643799
        max_q: 24.49956512451172
        mean_q: 24.3413143157959
        mean_td_error: 0.7458182573318481
        min_q: 23.49053192138672
    num_steps_sampled: 21504
    num_steps_trained: 20512
    num

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,21,31.5394,21504,54.1348,56.4534,51.4196,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-05-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.45342926439843
  episode_reward_mean: 53.84480153486315
  episode_reward_min: 51.41956301828532
  episodes_this_iter: 16
  episodes_total: 256
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 25600
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.8432988524436951
        max_q: 17.089534759521484
        mean_q: 16.99409294128418
        mean_td_error: 0.2818530797958374
        min_q: 16.85810661315918
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.5438141822814941
        max_q: 19.361886978149414
        mean_q: 19.121952056884766
        mean_td_error: -0.09449136257171631
        min_q: 18.938085556030273
    num_steps_sampled: 25600
    num_steps_trained: 24608


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,25,37.7139,25600,53.8448,56.4534,51.4196,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-05-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.45342926439843
  episode_reward_mean: 53.630602091604324
  episode_reward_min: 51.41956301828532
  episodes_this_iter: 8
  episodes_total: 280
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 28672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.37252190709114075
        max_q: 15.43765926361084
        mean_q: 15.323945999145508
        mean_td_error: 0.1256583333015442
        min_q: 15.195587158203125
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.4333929419517517
        max_q: 16.274641036987305
        mean_q: 15.969839096069336
        mean_td_error: -0.08360239863395691
        min_q: 15.847777366638184
    num_steps_sampled: 28672
    num_steps_trained: 2768

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,28,42.9147,28672,53.6306,56.4534,51.4196,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-06-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.72462053445552
  episode_reward_mean: 53.66825894205651
  episode_reward_min: 51.659510992900785
  episodes_this_iter: 8
  episodes_total: 312
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 31744
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.321338027715683
        max_q: 13.786115646362305
        mean_q: 13.707209587097168
        mean_td_error: 0.12203878164291382
        min_q: 13.589879989624023
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.3940703868865967
        max_q: 13.113321304321289
        mean_q: 12.957204818725586
        mean_td_error: -0.08615538477897644
        min_q: 12.870125770568848
    num_steps_sampled: 31744
    num_steps_trained: 3075

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,31,48.0328,31744,53.6683,55.7246,51.6595,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-06-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.72462053445552
  episode_reward_mean: 53.64216883662899
  episode_reward_min: 51.26480191760411
  episodes_this_iter: 8
  episodes_total: 344
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 34816
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009432288818061352
        max_q: 12.530706405639648
        mean_q: 12.374056816101074
        mean_td_error: -0.002933293581008911
        min_q: 12.295797348022461
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.36985543370246887
        max_q: 11.898957252502441
        mean_q: 11.824714660644531
        mean_td_error: 0.12120455503463745
        min_q: 11.750652313232422
    num_steps_sampled: 34816
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,34,53.0527,34816,53.6422,55.7246,51.2648,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-06-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.90585717765383
  episode_reward_mean: 53.794800501006286
  episode_reward_min: 51.26480191760411
  episodes_this_iter: 8
  episodes_total: 376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 37888
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.12503455579280853
        max_q: 11.074015617370605
        mean_q: 11.013469696044922
        mean_td_error: -0.049564093351364136
        min_q: 10.87415599822998
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.09110284596681595
        max_q: 10.816385269165039
        mean_q: 10.727029800415039
        mean_td_error: 0.0365699827671051
        min_q: 10.628334999084473
    num_steps_sampled: 37888
    num_steps_trained: 36

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,37,57.9586,37888,53.7948,55.9059,51.2648,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-06-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.171025229885906
  episode_reward_mean: 53.73886503991291
  episode_reward_min: 51.26480191760411
  episodes_this_iter: 16
  episodes_total: 408
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 40960
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04996102303266525
        max_q: 10.016161918640137
        mean_q: 9.945367813110352
        mean_td_error: -0.02545100450515747
        min_q: 9.863104820251465
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.11147905141115189
        max_q: 9.964752197265625
        mean_q: 9.862874031066895
        mean_td_error: 0.0389462411403656
        min_q: 9.680384635925293
    num_steps_sampled: 40960
    num_steps_trained: 39968


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,40,62.8843,40960,53.7389,57.171,51.2648,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-06-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.171025229885906
  episode_reward_mean: 53.647314987523835
  episode_reward_min: 51.883887546902656
  episodes_this_iter: 16
  episodes_total: 440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 44032
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02734227664768696
        max_q: 8.998805046081543
        mean_q: 8.919136047363281
        mean_td_error: -0.00873515009880066
        min_q: 8.833931922912598
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.15351252257823944
        max_q: 9.359397888183594
        mean_q: 9.1906099319458
        mean_td_error: 0.06039398908615112
        min_q: 9.09914493560791
    num_steps_sampled: 44032
    num_steps_trained: 43040
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,43,67.7799,44032,53.6473,57.171,51.8839,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-06-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.171025229885906
  episode_reward_mean: 53.62955941632101
  episode_reward_min: 51.883887546902656
  episodes_this_iter: 8
  episodes_total: 464
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 47104
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.08627524971961975
        max_q: 8.29007625579834
        mean_q: 8.229666709899902
        mean_td_error: 0.027993589639663696
        min_q: 8.146410942077637
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0774850845336914
        max_q: 8.567244529724121
        mean_q: 8.50825023651123
        mean_td_error: 0.03795298933982849
        min_q: 8.401065826416016
    num_steps_sampled: 47104
    num_steps_trained: 46112
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,46,72.5977,47104,53.6296,57.171,51.8839,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-06-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.94636100620418
  episode_reward_mean: 53.57277791149172
  episode_reward_min: 51.670445872116694
  episodes_this_iter: 16
  episodes_total: 512
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 51200
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007847449742257595
        max_q: 7.410966873168945
        mean_q: 7.326381683349609
        mean_td_error: -0.007955089211463928
        min_q: 7.228646755218506
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.1519225388765335
        max_q: 7.519762992858887
        mean_q: 7.473031520843506
        mean_td_error: -0.07719722390174866
        min_q: 7.347830295562744
    num_steps_sampled: 51200
    num_steps_trained: 5020

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,50,78.9532,51200,53.5728,55.9464,51.6704,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-06-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.94636100620418
  episode_reward_mean: 53.70636272472342
  episode_reward_min: 51.670445872116694
  episodes_this_iter: 8
  episodes_total: 536
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 54272
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023097947239875793
        max_q: 6.909536361694336
        mean_q: 6.854881286621094
        mean_td_error: 0.010481268167495728
        min_q: 6.756248950958252
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04832763969898224
        max_q: 7.054996013641357
        mean_q: 6.916999340057373
        mean_td_error: -0.020675376057624817
        min_q: 6.8145670890808105
    num_steps_sampled: 54272
    num_steps_trained: 532

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,53,83.8781,54272,53.7064,55.9464,51.6704,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-06-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.94636100620418
  episode_reward_mean: 53.814582251243635
  episode_reward_min: 51.670445872116694
  episodes_this_iter: 8
  episodes_total: 568
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 57344
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.12782955169677734
        max_q: 6.657486438751221
        mean_q: 6.593052387237549
        mean_td_error: 0.0719553679227829
        min_q: 6.508826732635498
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004171894863247871
        max_q: 6.6038947105407715
        mean_q: 6.531659126281738
        mean_td_error: -0.000243455171585083
        min_q: 6.452733516693115
    num_steps_sampled: 57344
    num_steps_trained: 5635

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,56,88.7805,57344,53.8146,55.9464,51.6704,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-06-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.521466272348476
  episode_reward_mean: 53.86983271088346
  episode_reward_min: 51.670445872116694
  episodes_this_iter: 8
  episodes_total: 600
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 60416
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00943014770746231
        max_q: 6.473996162414551
        mean_q: 6.416278839111328
        mean_td_error: 0.010167300701141357
        min_q: 6.284014701843262
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009726248099468648
        max_q: 6.566409587860107
        mean_q: 6.506452560424805
        mean_td_error: 0.0028312355279922485
        min_q: 6.445710182189941
    num_steps_sampled: 60416
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,59,93.8393,60416,53.8698,55.5215,51.6704,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-06-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.29943344596102
  episode_reward_mean: 53.74103145423897
  episode_reward_min: 51.577509796205646
  episodes_this_iter: 8
  episodes_total: 632
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 63488
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02884235791862011
        max_q: 6.454364776611328
        mean_q: 6.393374443054199
        mean_td_error: 0.01573115587234497
        min_q: 6.305696964263916
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.1499161422252655
        max_q: 6.4756340980529785
        mean_q: 6.3677215576171875
        mean_td_error: 0.09075376391410828
        min_q: 6.206573963165283
    num_steps_sampled: 63488
    num_steps_trained: 62496
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,62,98.961,63488,53.741,55.2994,51.5775,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-07-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.67846960278463
  episode_reward_mean: 53.63745783223837
  episode_reward_min: 51.577509796205646
  episodes_this_iter: 16
  episodes_total: 664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 66560
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.08036625385284424
        max_q: 6.3441009521484375
        mean_q: 6.275112628936768
        mean_td_error: 0.043887197971343994
        min_q: 6.164840221405029
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.09791496396064758
        max_q: 6.237187385559082
        mean_q: 6.160111427307129
        mean_td_error: -0.050319403409957886
        min_q: 6.069663047790527
    num_steps_sampled: 66560
    num_steps_trained: 655

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,65,104.041,66560,53.6375,55.6785,51.5775,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-07-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.729685536245086
  episode_reward_mean: 53.69374481304149
  episode_reward_min: 51.577509796205646
  episodes_this_iter: 16
  episodes_total: 696
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 69632
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06242389604449272
        max_q: 6.118226051330566
        mean_q: 6.039259433746338
        mean_td_error: -0.0293780118227005
        min_q: 5.95199728012085
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004494301043450832
        max_q: 6.259151935577393
        mean_q: 6.1693501472473145
        mean_td_error: 0.0018805265426635742
        min_q: 6.063796043395996
    num_steps_sampled: 69632
    num_steps_trained: 686

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,68,108.984,69632,53.6937,56.7297,51.5775,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-07-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.729685536245086
  episode_reward_mean: 53.9353316359936
  episode_reward_min: 52.08275891617812
  episodes_this_iter: 8
  episodes_total: 720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 72704
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.044709354639053345
        max_q: 5.9809794425964355
        mean_q: 5.9363203048706055
        mean_td_error: -0.03007259964942932
        min_q: 5.842525005340576
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.028508663177490234
        max_q: 6.092418670654297
        mean_q: 6.014638900756836
        mean_td_error: 0.018762528896331787
        min_q: 5.912252426147461
    num_steps_sampled: 72704
    num_steps_trained: 717

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,71,113.964,72704,53.9353,56.7297,52.0828,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-07-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.729685536245086
  episode_reward_mean: 54.06722548178616
  episode_reward_min: 51.89804409980905
  episodes_this_iter: 8
  episodes_total: 752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 75776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.12247927486896515
        max_q: 5.908071517944336
        mean_q: 5.816936492919922
        mean_td_error: -0.0731738805770874
        min_q: 5.7295122146606445
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0157957561314106
        max_q: 5.85688591003418
        mean_q: 5.800996780395508
        mean_td_error: 0.010053813457489014
        min_q: 5.735685348510742
    num_steps_sampled: 75776
    num_steps_trained: 74784
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,74,118.796,75776,54.0672,56.7297,51.898,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-07-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.46899110620292
  episode_reward_mean: 53.74004177527361
  episode_reward_min: 51.25791534037733
  episodes_this_iter: 8
  episodes_total: 792
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 79872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017515214160084724
        max_q: 6.001328468322754
        mean_q: 5.930180549621582
        mean_td_error: 0.011018991470336914
        min_q: 5.811270713806152
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06945134699344635
        max_q: 5.89349365234375
        mean_q: 5.836122989654541
        mean_td_error: 0.03358191251754761
        min_q: 5.749630451202393
    num_steps_sampled: 79872
    num_steps_trained: 78880
  

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,78,125.158,79872,53.74,56.469,51.2579,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-07-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.46899110620292
  episode_reward_mean: 53.47803252029731
  episode_reward_min: 51.25791534037733
  episodes_this_iter: 8
  episodes_total: 832
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 83968
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.059712205082178116
        max_q: 5.784461498260498
        mean_q: 5.709134101867676
        mean_td_error: -0.027445867657661438
        min_q: 5.621617317199707
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.08230403810739517
        max_q: 5.89659309387207
        mean_q: 5.789265155792236
        mean_td_error: 0.051951274275779724
        min_q: 5.692749977111816
    num_steps_sampled: 83968
    num_steps_trained: 82976


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,82,131.536,83968,53.478,56.469,51.2579,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-07-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.430835082046485
  episode_reward_mean: 53.42630472199189
  episode_reward_min: 51.07230369462029
  episodes_this_iter: 16
  episodes_total: 880
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 88064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.07193740457296371
        max_q: 5.531988143920898
        mean_q: 5.481285095214844
        mean_td_error: -0.055007994174957275
        min_q: 5.413406848907471
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.11602199077606201
        max_q: 5.7321014404296875
        mean_q: 5.64418888092041
        mean_td_error: 0.08448082208633423
        min_q: 5.42839241027832
    num_steps_sampled: 88064
    num_steps_trained: 87072


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,86,137.824,88064,53.4263,55.4308,51.0723,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-07-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.53442561669245
  episode_reward_mean: 53.44408288985378
  episode_reward_min: 51.07230369462029
  episodes_this_iter: 16
  episodes_total: 920
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 92160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06832194328308105
        max_q: 5.164453029632568
        mean_q: 5.0944294929504395
        mean_td_error: -0.038311734795570374
        min_q: 5.01930046081543
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06794431060552597
        max_q: 5.553469181060791
        mean_q: 5.482288360595703
        mean_td_error: -0.04565320909023285
        min_q: 5.380090713500977
    num_steps_sampled: 92160
    num_steps_trained: 91168

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,90,144.145,92160,53.4441,55.5344,51.0723,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-07-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.53442561669245
  episode_reward_mean: 53.53924423056651
  episode_reward_min: 51.26045218247468
  episodes_this_iter: 16
  episodes_total: 952
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 95232
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.05183795467019081
        max_q: 5.124342441558838
        mean_q: 5.061676502227783
        mean_td_error: -0.035420700907707214
        min_q: 4.95105504989624
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03490976616740227
        max_q: 5.44909143447876
        mean_q: 5.370397567749023
        mean_td_error: -0.020980075001716614
        min_q: 5.247272491455078
    num_steps_sampled: 95232
    num_steps_trained: 94240


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,93,149.084,95232,53.5392,55.5344,51.2605,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-07-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.53442561669245
  episode_reward_mean: 53.29225665960399
  episode_reward_min: 49.55695550996795
  episodes_this_iter: 8
  episodes_total: 976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 98304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.12314032018184662
        max_q: 5.080697536468506
        mean_q: 4.994744777679443
        mean_td_error: -0.08749321103096008
        min_q: 4.879149913787842
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.09212078154087067
        max_q: 5.276280403137207
        mean_q: 5.243711948394775
        mean_td_error: 0.06816470623016357
        min_q: 5.17821741104126
    num_steps_sampled: 98304
    num_steps_trained: 97312
   

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,96,154.335,98304,53.2923,55.5344,49.557,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-07-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.22734706643018
  episode_reward_mean: 53.26587241463254
  episode_reward_min: 49.55695550996795
  episodes_this_iter: 8
  episodes_total: 1008
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 101376
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.12768138945102692
        max_q: 5.1074538230896
        mean_q: 5.027532577514648
        mean_td_error: 0.08528992533683777
        min_q: 4.857716083526611
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.09563782811164856
        max_q: 5.419435501098633
        mean_q: 5.375482559204102
        mean_td_error: 0.06691570580005646
        min_q: 5.302273750305176
    num_steps_sampled: 101376
    num_steps_trained: 100384
 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,99,159.438,101376,53.2659,55.2273,49.557,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-08-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.39508929195583
  episode_reward_mean: 53.42260003340077
  episode_reward_min: 49.55695550996795
  episodes_this_iter: 8
  episodes_total: 1040
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 104448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.09002765268087387
        max_q: 5.005504608154297
        mean_q: 4.934853553771973
        mean_td_error: -0.06624923646450043
        min_q: 4.8348798751831055
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06966543197631836
        max_q: 5.127906799316406
        mean_q: 5.06258487701416
        mean_td_error: 0.05454443395137787
        min_q: 4.971279621124268
    num_steps_sampled: 104448
    num_steps_trained: 10345

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,102,164.517,104448,53.4226,55.3951,49.557,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-08-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.658285462653225
  episode_reward_mean: 53.72094077226136
  episode_reward_min: 51.75428568854618
  episodes_this_iter: 8
  episodes_total: 1072
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 107520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.07681762427091599
        max_q: 5.031782150268555
        mean_q: 4.957266807556152
        mean_td_error: 0.05544808506965637
        min_q: 4.819029331207275
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023083535954356194
        max_q: 5.192158222198486
        mean_q: 5.119873523712158
        mean_td_error: -0.02028028666973114
        min_q: 5.019571781158447
    num_steps_sampled: 107520
    num_steps_trained: 106

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,105,169.479,107520,53.7209,56.6583,51.7543,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-08-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.658285462653225
  episode_reward_mean: 53.847122552309045
  episode_reward_min: 51.39087576889426
  episodes_this_iter: 16
  episodes_total: 1104
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 110592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010981216095387936
        max_q: 4.966431617736816
        mean_q: 4.899971961975098
        mean_td_error: 0.008115112781524658
        min_q: 4.7412872314453125
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.08828263729810715
        max_q: 5.18337869644165
        mean_q: 5.017836093902588
        mean_td_error: 0.07369011640548706
        min_q: 4.91666841506958
    num_steps_sampled: 110592
    num_steps_trained: 10

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,108,174.442,110592,53.8471,56.6583,51.3909,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-08-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.658285462653225
  episode_reward_mean: 53.82132625793373
  episode_reward_min: 51.39087576889426
  episodes_this_iter: 16
  episodes_total: 1136
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 113664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.10689850151538849
        max_q: 4.963134765625
        mean_q: 4.9030680656433105
        mean_td_error: -0.08227944374084473
        min_q: 4.741204738616943
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.17493760585784912
        max_q: 5.209707260131836
        mean_q: 5.126394271850586
        mean_td_error: 0.14522404968738556
        min_q: 5.020622253417969
    num_steps_sampled: 113664
    num_steps_trained: 11267

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,111,179.4,113664,53.8213,56.6583,51.3909,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-08-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.658285462653225
  episode_reward_mean: 53.84928886943017
  episode_reward_min: 51.39087576889426
  episodes_this_iter: 8
  episodes_total: 1160
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 116736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03944537788629532
        max_q: 5.334461212158203
        mean_q: 5.239486217498779
        mean_td_error: 0.030164852738380432
        min_q: 5.155576229095459
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06756272166967392
        max_q: 5.028286457061768
        mean_q: 4.96165132522583
        mean_td_error: -0.055091843008995056
        min_q: 4.833914279937744
    num_steps_sampled: 116736
    num_steps_trained: 115

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,114,184.353,116736,53.8493,56.6583,51.3909,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-08-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.65006540199823
  episode_reward_mean: 53.755589237984786
  episode_reward_min: 50.541939289540075
  episodes_this_iter: 8
  episodes_total: 1192
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 119808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.2406885176897049
        max_q: 5.719791412353516
        mean_q: 5.593452453613281
        mean_td_error: 0.1675914078950882
        min_q: 5.381656646728516
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019581681117415428
        max_q: 5.268766403198242
        mean_q: 5.12896203994751
        mean_td_error: 0.013501107692718506
        min_q: 5.019759178161621
    num_steps_sampled: 119808
    num_steps_trained: 11881

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,117,189.3,119808,53.7556,56.6501,50.5419,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-08-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.979524250807465
  episode_reward_mean: 53.39420018477992
  episode_reward_min: 50.541939289540075
  episodes_this_iter: 8
  episodes_total: 1224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 122880
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019784463569521904
        max_q: 5.89424467086792
        mean_q: 5.8307037353515625
        mean_td_error: -0.00879526138305664
        min_q: 5.753773212432861
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.11192891001701355
        max_q: 5.116379737854004
        mean_q: 5.034848690032959
        mean_td_error: 0.10023042559623718
        min_q: 4.929513931274414
    num_steps_sampled: 122880
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,120,194.264,122880,53.3942,55.9795,50.5419,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-08-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.979524250807465
  episode_reward_mean: 53.243963520771906
  episode_reward_min: 50.51470810632292
  episodes_this_iter: 8
  episodes_total: 1256
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 125952
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.05017093941569328
        max_q: 5.879950046539307
        mean_q: 5.789413928985596
        mean_td_error: -0.03602088987827301
        min_q: 5.635735511779785
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0491822212934494
        max_q: 4.814616680145264
        mean_q: 4.746492385864258
        mean_td_error: -0.045087724924087524
        min_q: 4.665688991546631
    num_steps_sampled: 125952
    num_steps_trained: 12

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,123,199.318,125952,53.244,55.9795,50.5147,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-08-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.78087505311365
  episode_reward_mean: 53.29640995316823
  episode_reward_min: 50.51470810632292
  episodes_this_iter: 8
  episodes_total: 1288
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 129024
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.049211762845516205
        max_q: 5.7260870933532715
        mean_q: 5.647698402404785
        mean_td_error: -0.029402107000350952
        min_q: 5.493730545043945
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.2954612374305725
        max_q: 5.1936936378479
        mean_q: 4.965067386627197
        mean_td_error: 0.33976203203201294
        min_q: 4.729244709014893
    num_steps_sampled: 129024
    num_steps_trained: 12803

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,126,204.635,129024,53.2964,55.7809,50.5147,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-08-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.78087505311365
  episode_reward_mean: 53.66927195900575
  episode_reward_min: 50.51470810632292
  episodes_this_iter: 16
  episodes_total: 1320
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 132096
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.27346333861351013
        max_q: 5.532323360443115
        mean_q: 5.405747890472412
        mean_td_error: -0.18503129482269287
        min_q: 5.162876129150391
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04919516295194626
        max_q: 5.13107967376709
        mean_q: 4.950494766235352
        mean_td_error: -0.19096091389656067
        min_q: 4.804494857788086
    num_steps_sampled: 132096
    num_steps_trained: 1311

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,129,209.936,132096,53.6693,55.7809,50.5147,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-08-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.78087505311365
  episode_reward_mean: 53.786840812863986
  episode_reward_min: 50.42343051907539
  episodes_this_iter: 8
  episodes_total: 1344
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 135168
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.20325839519500732
        max_q: 5.884373664855957
        mean_q: 5.79270601272583
        mean_td_error: 0.12362255156040192
        min_q: 5.698818206787109
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028224699199199677
        max_q: 5.0206780433654785
        mean_q: 4.893261432647705
        mean_td_error: 0.04417194426059723
        min_q: 4.752951145172119
    num_steps_sampled: 135168
    num_steps_trained: 134

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,132,215.224,135168,53.7868,55.7809,50.4234,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-09-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.52481683859559
  episode_reward_mean: 53.825775038123794
  episode_reward_min: 50.42343051907539
  episodes_this_iter: 8
  episodes_total: 1376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 138240
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.08642251044511795
        max_q: 5.678338050842285
        mean_q: 5.592007637023926
        mean_td_error: 0.060375019907951355
        min_q: 5.453886985778809
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0038834582082927227
        max_q: 5.203695774078369
        mean_q: 5.135080337524414
        mean_td_error: -0.0637420266866684
        min_q: 5.030685901641846
    num_steps_sampled: 138240
    num_steps_trained: 13

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,135,220.473,138240,53.8258,56.5248,50.4234,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-09-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.52481683859559
  episode_reward_mean: 53.21722112223606
  episode_reward_min: 49.8209652252249
  episodes_this_iter: 8
  episodes_total: 1408
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 141312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.31708765029907227
        max_q: 5.690534591674805
        mean_q: 5.604261875152588
        mean_td_error: 0.20380154252052307
        min_q: 5.526325225830078
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019886803347617388
        max_q: 5.08599853515625
        mean_q: 5.0210065841674805
        mean_td_error: 0.029053106904029846
        min_q: 4.928752899169922
    num_steps_sampled: 141312
    num_steps_trained: 1403

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,138,225.648,141312,53.2172,56.5248,49.821,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-09-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.52481683859559
  episode_reward_mean: 53.110109695648646
  episode_reward_min: 49.8209652252249
  episodes_this_iter: 8
  episodes_total: 1440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 144384
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.1806146651506424
        max_q: 5.862858772277832
        mean_q: 5.733885765075684
        mean_td_error: 0.12505944073200226
        min_q: 5.5542311668396
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033638416789472103
        max_q: 4.951835632324219
        mean_q: 4.909099578857422
        mean_td_error: -0.05268855392932892
        min_q: 4.85936164855957
    num_steps_sampled: 144384
    num_steps_trained: 143392


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,141,230.621,144384,53.1101,56.5248,49.821,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-09-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.03285927663178
  episode_reward_mean: 53.18515356745713
  episode_reward_min: 49.8209652252249
  episodes_this_iter: 8
  episodes_total: 1472
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 147456
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03796345740556717
        max_q: 5.608668804168701
        mean_q: 5.539212226867676
        mean_td_error: 0.027085214853286743
        min_q: 5.438658714294434
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020313875284045935
        max_q: 4.795871257781982
        mean_q: 4.670633792877197
        mean_td_error: -0.0242595374584198
        min_q: 4.517255783081055
    num_steps_sampled: 147456
    num_steps_trained: 1464

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,144,235.614,147456,53.1852,56.0329,49.821,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-09-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.03285927663178
  episode_reward_mean: 53.37365430622738
  episode_reward_min: 50.057369203092875
  episodes_this_iter: 16
  episodes_total: 1504
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 150528
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018791982904076576
        max_q: 5.819026947021484
        mean_q: 5.584268093109131
        mean_td_error: -0.01243390142917633
        min_q: 5.327282905578613
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003678548615425825
        max_q: 4.644171237945557
        mean_q: 4.517655372619629
        mean_td_error: 0.035304948687553406
        min_q: 4.331700801849365
    num_steps_sampled: 150528
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,147,240.595,150528,53.3737,56.0329,50.0574,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-09-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.91779351905338
  episode_reward_mean: 53.74262273539356
  episode_reward_min: 51.45919610388686
  episodes_this_iter: 16
  episodes_total: 1536
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 153600
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010983859188854694
        max_q: 4.595504283905029
        mean_q: 4.4005446434021
        mean_td_error: -0.17634350061416626
        min_q: 4.191352367401123
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006382044404745102
        max_q: 4.34718132019043
        mean_q: 4.262924671173096
        mean_td_error: -0.09202034771442413
        min_q: 4.184136390686035
    num_steps_sampled: 153600
    num_steps_trained: 1526

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,150,245.683,153600,53.7426,55.9178,51.4592,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-09-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.91779351905338
  episode_reward_mean: 53.54218895033847
  episode_reward_min: 50.87943394278584
  episodes_this_iter: 8
  episodes_total: 1560
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 156672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00456375814974308
        max_q: 3.5877976417541504
        mean_q: 3.3974356651306152
        mean_td_error: 0.07207966595888138
        min_q: 3.216862916946411
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00805311743170023
        max_q: 4.000515937805176
        mean_q: 3.8993678092956543
        mean_td_error: -0.10918733477592468
        min_q: 3.79636287689209
    num_steps_sampled: 156672
    num_steps_trained: 155

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,153,250.853,156672,53.5422,55.9178,50.8794,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-09-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.21319915069867
  episode_reward_mean: 53.41241191615267
  episode_reward_min: 49.398219188903745
  episodes_this_iter: 8
  episodes_total: 1592
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 159744
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005581131670624018
        max_q: 3.3940536975860596
        mean_q: 3.271705150604248
        mean_td_error: -0.09671664237976074
        min_q: 3.1501686573028564
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0046571362763643265
        max_q: 3.739838123321533
        mean_q: 3.5055782794952393
        mean_td_error: -0.050575800240039825
        min_q: 3.2985713481903076
    num_steps_sampled: 159744
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,156,255.869,159744,53.4124,56.2132,49.3982,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-09-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.2348361503203
  episode_reward_mean: 53.51571356625184
  episode_reward_min: 49.398219188903745
  episodes_this_iter: 8
  episodes_total: 1624
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 162816
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021023540757596493
        max_q: 3.4459757804870605
        mean_q: 3.3233184814453125
        mean_td_error: -0.007460467517375946
        min_q: 3.0850863456726074
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029743430204689503
        max_q: 3.5915985107421875
        mean_q: 3.184644937515259
        mean_td_error: 0.017951101064682007
        min_q: 3.015223503112793
    num_steps_sampled: 162816
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,159,260.873,162816,53.5157,57.2348,49.3982,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-09-49
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.2348361503203
  episode_reward_mean: 53.66427151394244
  episode_reward_min: 49.398219188903745
  episodes_this_iter: 8
  episodes_total: 1656
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 165888
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00968891754746437
        max_q: 3.418877601623535
        mean_q: 3.3064749240875244
        mean_td_error: -0.16688384115695953
        min_q: 3.2100112438201904
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004640516825020313
        max_q: 2.7739527225494385
        mean_q: 2.446009397506714
        mean_td_error: -0.043367452919483185
        min_q: 2.182464599609375
    num_steps_sampled: 165888
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,162,265.88,165888,53.6643,57.2348,49.3982,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-09-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.2348361503203
  episode_reward_mean: 53.4254455950276
  episode_reward_min: 49.5515350322199
  episodes_this_iter: 16
  episodes_total: 1688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 168960
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0080267284065485
        max_q: 3.0158369541168213
        mean_q: 2.868128776550293
        mean_td_error: -0.1335592269897461
        min_q: 2.754915952682495
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009422632865607738
        max_q: 2.283221483230591
        mean_q: 1.912330150604248
        mean_td_error: -0.12865610420703888
        min_q: 1.752087950706482
    num_steps_sampled: 168960
    num_steps_trained: 167968

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,165,270.863,168960,53.4254,57.2348,49.5515,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-10-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.54173056040455
  episode_reward_mean: 52.53868055270894
  episode_reward_min: 48.71346993014678
  episodes_this_iter: 16
  episodes_total: 1720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 172032
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0064103384502232075
        max_q: 2.7983574867248535
        mean_q: 2.483210325241089
        mean_td_error: -0.05348554998636246
        min_q: 2.0387916564941406
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009866579435765743
        max_q: 2.326876163482666
        mean_q: 1.9861961603164673
        mean_td_error: -0.1326475441455841
        min_q: 1.5995538234710693
    num_steps_sampled: 172032
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,168,275.858,172032,52.5387,56.5417,48.7135,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-10-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.54173056040455
  episode_reward_mean: 52.49208722293446
  episode_reward_min: 48.71346993014678
  episodes_this_iter: 8
  episodes_total: 1744
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 175104
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007697600405663252
        max_q: 2.524266481399536
        mean_q: 2.328423500061035
        mean_td_error: -0.12767118215560913
        min_q: 2.0518314838409424
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022261349949985743
        max_q: 2.6423325538635254
        mean_q: 2.518364906311035
        mean_td_error: -0.029760979115962982
        min_q: 2.327657699584961
    num_steps_sampled: 175104
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,171,281.161,175104,52.4921,56.5417,48.7135,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-10-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.617557447373805
  episode_reward_mean: 52.536682528118924
  episode_reward_min: 48.71346993014678
  episodes_this_iter: 8
  episodes_total: 1776
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 178176
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021242934744805098
        max_q: 2.5833473205566406
        mean_q: 2.439960241317749
        mean_td_error: 0.014415211975574493
        min_q: 2.381009340286255
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037642735987901688
        max_q: 3.047297239303589
        mean_q: 2.942138433456421
        mean_td_error: -0.05846835672855377
        min_q: 2.8110311031341553
    num_steps_sampled: 178176
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,174,286.503,178176,52.5367,56.6176,48.7135,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-10-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.823526118717865
  episode_reward_mean: 53.21492156579935
  episode_reward_min: 48.71346993014678
  episodes_this_iter: 8
  episodes_total: 1808
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 181248
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01623963564634323
        max_q: 2.730311155319214
        mean_q: 2.4772872924804688
        mean_td_error: -0.18524442613124847
        min_q: 2.3732168674468994
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007223318796604872
        max_q: 3.2167177200317383
        mean_q: 3.0626373291015625
        mean_td_error: -0.10898520797491074
        min_q: 2.97515869140625
    num_steps_sampled: 181248
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,177,291.866,181248,53.2149,56.8235,48.7135,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-10-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.823526118717865
  episode_reward_mean: 53.057564296901155
  episode_reward_min: 47.776298106673686
  episodes_this_iter: 8
  episodes_total: 1840
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 184320
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.027368882670998573
        max_q: 1.9440099000930786
        mean_q: 1.5200181007385254
        mean_td_error: -0.3343837261199951
        min_q: 1.2426573038101196
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006868877448141575
        max_q: 3.454874038696289
        mean_q: 3.267960786819458
        mean_td_error: -0.11545266211032867
        min_q: 3.135795831680298
    num_steps_sampled: 184320
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,180,297.413,184320,53.0576,56.8235,47.7763,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-10-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.223980340921734
  episode_reward_mean: 53.27763816255951
  episode_reward_min: 47.776298106673686
  episodes_this_iter: 16
  episodes_total: 1872
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 187392
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006004745606333017
        max_q: 2.6053664684295654
        mean_q: 2.525611639022827
        mean_td_error: 0.12653493881225586
        min_q: 2.4036552906036377
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010935138911008835
        max_q: 3.725285768508911
        mean_q: 3.3420567512512207
        mean_td_error: -0.15306386351585388
        min_q: 3.1521496772766113
    num_steps_sampled: 187392
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,183,302.964,187392,53.2776,57.224,47.7763,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-10-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.223980340921734
  episode_reward_mean: 53.177117505556225
  episode_reward_min: 47.776298106673686
  episodes_this_iter: 16
  episodes_total: 1904
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 190464
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014442355604842305
        max_q: 3.4234089851379395
        mean_q: 3.3541359901428223
        mean_td_error: -0.02323233336210251
        min_q: 3.2392830848693848
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011722039198502898
        max_q: 3.7926831245422363
        mean_q: 3.6894986629486084
        mean_td_error: 0.005438446998596191
        min_q: 3.5504064559936523
    num_steps_sampled: 190464
    num_steps

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,186,308.462,190464,53.1771,57.224,47.7763,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-10-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.223980340921734
  episode_reward_mean: 53.09898550808385
  episode_reward_min: 47.776298106673686
  episodes_this_iter: 8
  episodes_total: 1928
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 193536
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010510804131627083
        max_q: 4.0013041496276855
        mean_q: 3.9290614128112793
        mean_td_error: 0.019551008939743042
        min_q: 3.792417049407959
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016617472283542156
        max_q: 3.6457715034484863
        mean_q: 3.517597198486328
        mean_td_error: 0.019035376608371735
        min_q: 3.4264931678771973
    num_steps_sampled: 193536
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,189,313.808,193536,53.099,57.224,47.7763,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-10-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.66665684758427
  episode_reward_mean: 53.20625899882157
  episode_reward_min: 49.50034906131154
  episodes_this_iter: 8
  episodes_total: 1960
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 196608
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018304289551451802
        max_q: 4.24706506729126
        mean_q: 4.183379650115967
        mean_td_error: 0.0437544584274292
        min_q: 4.082939147949219
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004272205289453268
        max_q: 4.014129638671875
        mean_q: 3.884904384613037
        mean_td_error: -0.06528230756521225
        min_q: 3.696162462234497
    num_steps_sampled: 196608
    num_steps_trained: 1956

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,192,319.16,196608,53.2063,56.6667,49.5003,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-10-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.38104955176724
  episode_reward_mean: 52.655826985201074
  episode_reward_min: 48.815033773740744
  episodes_this_iter: 8
  episodes_total: 1992
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 199680
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002559602726250887
        max_q: 4.658210277557373
        mean_q: 4.583592414855957
        mean_td_error: 0.05179455876350403
        min_q: 4.502536296844482
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0061301663517951965
        max_q: 3.762359380722046
        mean_q: 3.640493869781494
        mean_td_error: 0.08710378408432007
        min_q: 3.4587831497192383
    num_steps_sampled: 199680
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,195,324.518,199680,52.6558,56.381,48.815,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-10-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.2072743405292
  episode_reward_mean: 52.868172299743094
  episode_reward_min: 48.815033773740744
  episodes_this_iter: 8
  episodes_total: 2024
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 202752
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024379033129662275
        max_q: 4.767638683319092
        mean_q: 4.659404277801514
        mean_td_error: -0.04451867938041687
        min_q: 4.551303863525391
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025737492833286524
        max_q: 3.7607083320617676
        mean_q: 3.591716766357422
        mean_td_error: -0.03370746225118637
        min_q: 3.517073392868042
    num_steps_sampled: 202752
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,198,329.77,202752,52.8682,57.2073,48.815,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-11-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.2072743405292
  episode_reward_mean: 53.002252673238274
  episode_reward_min: 48.01741450960927
  episodes_this_iter: 8
  episodes_total: 2056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 205824
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00380394933745265
        max_q: 4.66739559173584
        mean_q: 4.502423286437988
        mean_td_error: 0.08005844056606293
        min_q: 4.298792839050293
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002026790054515004
        max_q: 3.6317825317382812
        mean_q: 3.380767822265625
        mean_td_error: 0.008431628346443176
        min_q: 3.1391515731811523
    num_steps_sampled: 205824
    num_steps_trained: 204

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,201,334.986,205824,53.0023,57.2073,48.0174,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-11-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.40482385885305
  episode_reward_mean: 53.87465010441476
  episode_reward_min: 48.01741450960927
  episodes_this_iter: 16
  episodes_total: 2088
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 208896
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001166761852800846
        max_q: 4.326265335083008
        mean_q: 4.226694107055664
        mean_td_error: -0.018467232584953308
        min_q: 4.102489471435547
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004077746067196131
        max_q: 3.1794164180755615
        mean_q: 3.030182123184204
        mean_td_error: -0.04323966056108475
        min_q: 2.946460723876953
    num_steps_sampled: 208896
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,204,340.009,208896,53.8747,59.4048,48.0174,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-11-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.40482385885305
  episode_reward_mean: 53.93696691956704
  episode_reward_min: 48.01741450960927
  episodes_this_iter: 8
  episodes_total: 2112
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 211968
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025173856411129236
        max_q: 4.516604423522949
        mean_q: 4.399552345275879
        mean_td_error: 0.05681459605693817
        min_q: 4.29592752456665
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.028952205553650856
        max_q: 2.529475688934326
        mean_q: 2.329867362976074
        mean_td_error: -0.38887274265289307
        min_q: 2.1793570518493652
    num_steps_sampled: 211968
    num_steps_trained: 21

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,207,344.991,211968,53.937,59.4048,48.0174,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-11-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.40482385885305
  episode_reward_mean: 53.71460637874435
  episode_reward_min: 48.01741450960927
  episodes_this_iter: 8
  episodes_total: 2144
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 215040
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017931207548826933
        max_q: 4.457284450531006
        mean_q: 4.384799957275391
        mean_td_error: -0.03622886538505554
        min_q: 4.302298069000244
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035952634643763304
        max_q: 2.3202333450317383
        mean_q: 2.1409716606140137
        mean_td_error: -0.043007150292396545
        min_q: 2.064570665359497
    num_steps_sampled: 215040
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,210,349.974,215040,53.7146,59.4048,48.0174,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-11-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.333299860037485
  episode_reward_mean: 53.827559342286534
  episode_reward_min: 50.303944688740955
  episodes_this_iter: 8
  episodes_total: 2176
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 218112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002082363236695528
        max_q: 4.702447414398193
        mean_q: 4.640430927276611
        mean_td_error: 0.03536467254161835
        min_q: 4.5612101554870605
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003776951925829053
        max_q: 2.6116340160369873
        mean_q: 2.386362314224243
        mean_td_error: -0.04359688609838486
        min_q: 2.153268337249756
    num_steps_sampled: 218112
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,213,355.259,218112,53.8276,58.3333,50.3039,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-11-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.07885034368218
  episode_reward_mean: 53.99887179105647
  episode_reward_min: 50.91706401438262
  episodes_this_iter: 8
  episodes_total: 2208
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 221184
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005153759382665157
        max_q: 4.563726425170898
        mean_q: 4.488357067108154
        mean_td_error: -0.09341879189014435
        min_q: 4.426153659820557
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023158414289355278
        max_q: 2.0700252056121826
        mean_q: 1.5613319873809814
        mean_td_error: -0.31871870160102844
        min_q: 1.3392376899719238
    num_steps_sampled: 221184
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,216,360.781,221184,53.9989,59.0789,50.9171,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-11-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.07885034368218
  episode_reward_mean: 53.74173984609561
  episode_reward_min: 50.91706401438262
  episodes_this_iter: 8
  episodes_total: 2240
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 224256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020229199435561895
        max_q: 4.301354885101318
        mean_q: 4.152780055999756
        mean_td_error: -0.022997498512268066
        min_q: 4.013130187988281
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.020339764654636383
        max_q: 1.6589241027832031
        mean_q: 1.404466986656189
        mean_td_error: -0.3051554560661316
        min_q: 1.0308135747909546
    num_steps_sampled: 224256
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,219,366.391,224256,53.7417,59.0789,50.9171,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-11-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.07885034368218
  episode_reward_mean: 54.47231989269685
  episode_reward_min: 51.048981035731636
  episodes_this_iter: 16
  episodes_total: 2272
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 227328
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0036729949060827494
        max_q: 4.25595760345459
        mean_q: 3.9272353649139404
        mean_td_error: -0.0604267418384552
        min_q: 3.717283248901367
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02151624672114849
        max_q: 1.6277798414230347
        mean_q: 1.4668725728988647
        mean_td_error: -0.2935924828052521
        min_q: 1.3616234064102173
    num_steps_sampled: 227328
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,222,371.808,227328,54.4723,59.0789,51.049,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-11-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.29611701296397
  episode_reward_mean: 53.91414475909443
  episode_reward_min: 50.78563246369076
  episodes_this_iter: 16
  episodes_total: 2304
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 230400
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002961607649922371
        max_q: 3.3606579303741455
        mean_q: 3.270395040512085
        mean_td_error: -0.04093620926141739
        min_q: 3.1166436672210693
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002579423366114497
        max_q: 2.1375977993011475
        mean_q: 1.9561039209365845
        mean_td_error: 0.0034081190824508667
        min_q: 1.78193199634552
    num_steps_sampled: 230400
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,225,376.859,230400,53.9141,57.2961,50.7856,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-11-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.29611701296397
  episode_reward_mean: 53.403981267456665
  episode_reward_min: 49.68618175010392
  episodes_this_iter: 8
  episodes_total: 2328
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 233472
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0054289307445287704
        max_q: 3.5476741790771484
        mean_q: 3.409191608428955
        mean_td_error: -0.07870541512966156
        min_q: 3.256639242172241
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0065541453659534454
        max_q: 2.0280368328094482
        mean_q: 1.646596074104309
        mean_td_error: 0.0007147789001464844
        min_q: 1.2646360397338867
    num_steps_sampled: 233472
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,228,381.898,233472,53.404,57.2961,49.6862,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-11-55
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.267778661394296
  episode_reward_mean: 52.73304166478251
  episode_reward_min: 48.263234815971714
  episodes_this_iter: 8
  episodes_total: 2360
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 236544
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011890267953276634
        max_q: 1.9774101972579956
        mean_q: 1.6964325904846191
        mean_td_error: -0.1609485149383545
        min_q: 1.209761142730713
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004827980417758226
        max_q: 1.744335651397705
        mean_q: 1.566328525543213
        mean_td_error: 0.035694461315870285
        min_q: 1.4488213062286377
    num_steps_sampled: 236544
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,231,387.065,236544,52.733,56.2678,48.2632,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-12-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.51264546295457
  episode_reward_mean: 52.807131465221836
  episode_reward_min: 48.263234815971714
  episodes_this_iter: 8
  episodes_total: 2392
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 239616
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007427648175507784
        max_q: 0.846409261226654
        mean_q: 0.6184167861938477
        mean_td_error: -0.1232452243566513
        min_q: 0.224229633808136
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007393502164632082
        max_q: 1.9161791801452637
        mean_q: 1.405411720275879
        mean_td_error: -0.022472098469734192
        min_q: 1.0551350116729736
    num_steps_sampled: 239616
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,234,392.252,239616,52.8071,59.5126,48.2632,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-12-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.51264546295457
  episode_reward_mean: 53.19352238489692
  episode_reward_min: 48.263234815971714
  episodes_this_iter: 8
  episodes_total: 2424
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 242688
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00274670566432178
        max_q: 1.3574082851409912
        mean_q: 1.161742925643921
        mean_td_error: 0.011186186224222183
        min_q: 0.8147614002227783
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012651295401155949
        max_q: 1.8134710788726807
        mean_q: 1.572311282157898
        mean_td_error: 0.11602021753787994
        min_q: 1.451170563697815
    num_steps_sampled: 242688
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,237,397.443,242688,53.1935,59.5126,48.2632,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-12-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.51264546295457
  episode_reward_mean: 53.87148880300668
  episode_reward_min: 50.102697893401285
  episodes_this_iter: 16
  episodes_total: 2456
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 245760
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030835126526653767
        max_q: 1.8107335567474365
        mean_q: 1.6413280963897705
        mean_td_error: -0.03903732821345329
        min_q: 1.4420157670974731
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.029009560123085976
        max_q: 1.247300148010254
        mean_q: 0.9924182891845703
        mean_td_error: -0.2742151618003845
        min_q: 0.4340255856513977
    num_steps_sampled: 245760
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,240,402.642,245760,53.8715,59.5126,50.1027,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-12-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.22729217257366
  episode_reward_mean: 53.60609428722144
  episode_reward_min: 49.24750438887925
  episodes_this_iter: 16
  episodes_total: 2488
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 248832
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0034842793829739094
        max_q: 2.2380192279815674
        mean_q: 2.0224316120147705
        mean_td_error: -0.057083990424871445
        min_q: 1.8272045850753784
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025869302451610565
        max_q: 1.089646816253662
        mean_q: 0.7847143411636353
        mean_td_error: -0.20303797721862793
        min_q: 0.4955163598060608
    num_steps_sampled: 248832
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,243,407.855,248832,53.6061,57.2273,49.2475,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-12-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.22729217257366
  episode_reward_mean: 53.63118941855043
  episode_reward_min: 49.24750438887925
  episodes_this_iter: 8
  episodes_total: 2512
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 251904
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024655223824083805
        max_q: 2.5125324726104736
        mean_q: 2.3958401679992676
        mean_td_error: -0.0018473193049430847
        min_q: 2.2460319995880127
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0052293515764176846
        max_q: 1.3708477020263672
        mean_q: 1.2488726377487183
        mean_td_error: 0.029694847762584686
        min_q: 1.1579831838607788
    num_steps_sampled: 251904
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,246,413.202,251904,53.6312,57.2273,49.2475,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-12-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.81946162247011
  episode_reward_mean: 54.13830941632168
  episode_reward_min: 49.24750438887925
  episodes_this_iter: 8
  episodes_total: 2544
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 254976
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00440716790035367
        max_q: 2.7228851318359375
        mean_q: 2.5245909690856934
        mean_td_error: -0.06729180365800858
        min_q: 2.3467118740081787
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003557778662070632
        max_q: 2.681422710418701
        mean_q: 2.4510977268218994
        mean_td_error: 0.011696599423885345
        min_q: 2.3140745162963867
    num_steps_sampled: 254976
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,249,418.575,254976,54.1383,57.8195,49.2475,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-12-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.81946162247011
  episode_reward_mean: 53.679255958938164
  episode_reward_min: 49.24750438887925
  episodes_this_iter: 8
  episodes_total: 2576
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 258048
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018500688020139933
        max_q: 2.7624711990356445
        mean_q: 2.5719244480133057
        mean_td_error: 0.0004271790385246277
        min_q: 2.4608912467956543
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.024713190272450447
        max_q: 2.9880897998809814
        mean_q: 2.901508331298828
        mean_td_error: 0.20551005005836487
        min_q: 2.7486560344696045
    num_steps_sampled: 258048
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,252,423.731,258048,53.6793,57.8195,49.2475,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-12-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.26366308442357
  episode_reward_mean: 54.02474079338389
  episode_reward_min: 48.98340457706507
  episodes_this_iter: 8
  episodes_total: 2608
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 261120
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011510951444506645
        max_q: 2.17785382270813
        mean_q: 2.1111810207366943
        mean_td_error: -0.14935055375099182
        min_q: 2.002558708190918
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011797547340393066
        max_q: 3.287290096282959
        mean_q: 3.109644889831543
        mean_td_error: -0.10873392224311829
        min_q: 2.9871675968170166
    num_steps_sampled: 261120
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,255,428.776,261120,54.0247,59.2637,48.9834,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-12-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.26366308442357
  episode_reward_mean: 53.2819694093909
  episode_reward_min: 48.98340457706507
  episodes_this_iter: 16
  episodes_total: 2640
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 264192
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004840562120079994
        max_q: 2.362874984741211
        mean_q: 2.0227086544036865
        mean_td_error: 0.03802795708179474
        min_q: 1.7126466035842896
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007496600039303303
        max_q: 3.110374927520752
        mean_q: 2.905035972595215
        mean_td_error: 0.06840644776821136
        min_q: 2.8196144104003906
    num_steps_sampled: 264192
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,258,434.142,264192,53.282,59.2637,48.9834,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-12-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.26366308442357
  episode_reward_mean: 53.02578943206022
  episode_reward_min: 48.98340457706507
  episodes_this_iter: 16
  episodes_total: 2672
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 267264
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03190155327320099
        max_q: 2.2154157161712646
        mean_q: 2.0233616828918457
        mean_td_error: -0.4093775749206543
        min_q: 1.8089015483856201
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005516134202480316
        max_q: 3.09920072555542
        mean_q: 2.8666067123413086
        mean_td_error: 0.00488647073507309
        min_q: 2.6079187393188477
    num_steps_sampled: 267264
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,261,439.698,267264,53.0258,59.2637,48.9834,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-12-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.26366308442357
  episode_reward_mean: 53.5410264026672
  episode_reward_min: 49.39534369466075
  episodes_this_iter: 8
  episodes_total: 2696
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 270336
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033371951431035995
        max_q: 2.265012502670288
        mean_q: 2.0421640872955322
        mean_td_error: 0.03124319389462471
        min_q: 1.8874030113220215
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003266518469899893
        max_q: 3.164430618286133
        mean_q: 2.917391538619995
        mean_td_error: -0.022043250501155853
        min_q: 2.7618205547332764
    num_steps_sampled: 270336
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,264,445.472,270336,53.541,59.2637,49.3953,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-13-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.4903606777373
  episode_reward_mean: 53.35158903917337
  episode_reward_min: 49.39534369466075
  episodes_this_iter: 8
  episodes_total: 2728
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 273408
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017612885683774948
        max_q: 2.2486684322357178
        mean_q: 2.107419967651367
        mean_td_error: -0.22023342549800873
        min_q: 1.9614580869674683
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028573358431458473
        max_q: 3.3874704837799072
        mean_q: 3.2945950031280518
        mean_td_error: 0.032454319298267365
        min_q: 3.211552858352661
    num_steps_sampled: 273408
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,267,450.842,273408,53.3516,56.4904,49.3953,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-13-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.4903606777373
  episode_reward_mean: 53.644971004853616
  episode_reward_min: 49.379182143522904
  episodes_this_iter: 8
  episodes_total: 2760
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 276480
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006596392020583153
        max_q: 2.0657851696014404
        mean_q: 1.7940466403961182
        mean_td_error: -0.08284565806388855
        min_q: 1.5497918128967285
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028763480950146914
        max_q: 3.796621799468994
        mean_q: 3.6438815593719482
        mean_td_error: -0.014987930655479431
        min_q: 3.5246450901031494
    num_steps_sampled: 276480
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,270,455.886,276480,53.645,56.4904,49.3792,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-13-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.096587197005
  episode_reward_mean: 52.93162528735836
  episode_reward_min: 49.379182143522904
  episodes_this_iter: 8
  episodes_total: 2792
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 279552
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01846800185739994
        max_q: 2.41348934173584
        mean_q: 2.085922956466675
        mean_td_error: 0.1782229244709015
        min_q: 1.931877851486206
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030480874702334404
        max_q: 4.115473747253418
        mean_q: 4.0055317878723145
        mean_td_error: -0.031303636729717255
        min_q: 3.8094682693481445
    num_steps_sampled: 279552
    num_steps_trained: 278

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,273,460.943,279552,52.9316,56.0966,49.3792,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-13-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.096587197005
  episode_reward_mean: 52.09873515103639
  episode_reward_min: 48.38326541913157
  episodes_this_iter: 8
  episodes_total: 2824
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 282624
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008863402530550957
        max_q: 1.667304277420044
        mean_q: 1.376662254333496
        mean_td_error: -0.09659308195114136
        min_q: 1.1836384534835815
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008252833969891071
        max_q: 4.355093479156494
        mean_q: 4.29069709777832
        mean_td_error: 0.09786979854106903
        min_q: 4.21245002746582
    num_steps_sampled: 282624
    num_steps_trained: 281632

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,276,466.057,282624,52.0987,56.0966,48.3833,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-13-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.69266978115861
  episode_reward_mean: 51.85046188760025
  episode_reward_min: 48.38326541913157
  episodes_this_iter: 16
  episodes_total: 2856
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 285696
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017065338790416718
        max_q: 1.7634317874908447
        mean_q: 1.6521650552749634
        mean_td_error: -0.2064736783504486
        min_q: 1.5060871839523315
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015096340794116259
        max_q: 4.509033203125
        mean_q: 4.421278953552246
        mean_td_error: -0.004778385162353516
        min_q: 4.236677169799805
    num_steps_sampled: 285696
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,279,471.372,285696,51.8505,56.6927,48.3833,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-13-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.69266978115861
  episode_reward_mean: 51.96950991517756
  episode_reward_min: 48.38326541913157
  episodes_this_iter: 8
  episodes_total: 2880
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 288768
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005962381139397621
        max_q: 2.1288399696350098
        mean_q: 1.9315215349197388
        mean_td_error: -0.051000647246837616
        min_q: 1.7761043310165405
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018493208335712552
        max_q: 4.393467903137207
        mean_q: 4.273702621459961
        mean_td_error: 0.026957914233207703
        min_q: 4.2163920402526855
    num_steps_sampled: 288768
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,282,476.598,288768,51.9695,56.6927,48.3833,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-13-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.69266978115861
  episode_reward_mean: 52.159436690254296
  episode_reward_min: 48.849707004606415
  episodes_this_iter: 8
  episodes_total: 2912
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 291840
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021746773272752762
        max_q: 1.9623943567276
        mean_q: 1.712530255317688
        mean_td_error: -0.2235613316297531
        min_q: 1.560652494430542
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023169894702732563
        max_q: 4.481716156005859
        mean_q: 4.365266799926758
        mean_td_error: 0.0134267657995224
        min_q: 4.265002250671387
    num_steps_sampled: 291840
    num_steps_trained: 2908

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,285,481.88,291840,52.1594,56.6927,48.8497,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-13-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.69266978115861
  episode_reward_mean: 53.092166045155324
  episode_reward_min: 48.849707004606415
  episodes_this_iter: 8
  episodes_total: 2944
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 294912
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006288554519414902
        max_q: 2.3930859565734863
        mean_q: 2.006258249282837
        mean_td_error: 0.0010048821568489075
        min_q: 1.5634678602218628
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004984579514712095
        max_q: 4.434904098510742
        mean_q: 4.361779689788818
        mean_td_error: -0.0663265734910965
        min_q: 4.2439165115356445
    num_steps_sampled: 294912
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,288,487.108,294912,53.0922,56.6927,48.8497,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-13-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.619519101793976
  episode_reward_mean: 53.19297374720385
  episode_reward_min: 48.849707004606415
  episodes_this_iter: 8
  episodes_total: 2976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 297984
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013796866871416569
        max_q: 1.8747479915618896
        mean_q: 1.6160603761672974
        mean_td_error: 0.12644615769386292
        min_q: 1.4204089641571045
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004964102990925312
        max_q: 4.411076068878174
        mean_q: 4.29582405090332
        mean_td_error: -0.042006537318229675
        min_q: 4.191436767578125
    num_steps_sampled: 297984
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,291,492.333,297984,53.193,56.6195,48.8497,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-13-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.619519101793976
  episode_reward_mean: 53.66574685163056
  episode_reward_min: 50.280890102965266
  episodes_this_iter: 8
  episodes_total: 3008
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 301056
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009995843283832073
        max_q: 2.318530797958374
        mean_q: 1.960253119468689
        mean_td_error: 0.09536442160606384
        min_q: 1.6496663093566895
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0034235992934554815
        max_q: 4.604927062988281
        mean_q: 4.536921501159668
        mean_td_error: -0.07663619518280029
        min_q: 4.4878315925598145
    num_steps_sampled: 301056
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,294,497.558,301056,53.6657,56.6195,50.2809,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-13-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.619519101793976
  episode_reward_mean: 53.255913168517445
  episode_reward_min: 49.990933504030416
  episodes_this_iter: 16
  episodes_total: 3040
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 304128
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01222812756896019
        max_q: 2.4095802307128906
        mean_q: 2.2451536655426025
        mean_td_error: 0.15685158967971802
        min_q: 2.143620729446411
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014489823952317238
        max_q: 4.896602630615234
        mean_q: 4.865914344787598
        mean_td_error: 0.023177385330200195
        min_q: 4.787949085235596
    num_steps_sampled: 304128
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,297,502.757,304128,53.2559,56.6195,49.9909,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-14-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.508984248918686
  episode_reward_mean: 52.79735847049828
  episode_reward_min: 48.69633326543884
  episodes_this_iter: 16
  episodes_total: 3072
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 307200
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003931901883333921
        max_q: 3.110618829727173
        mean_q: 3.0215280055999756
        mean_td_error: 0.056128449738025665
        min_q: 2.875274419784546
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019769559148699045
        max_q: 4.9874467849731445
        mean_q: 4.929028034210205
        mean_td_error: -0.04002659022808075
        min_q: 4.857537746429443
    num_steps_sampled: 307200
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,300,507.898,307200,52.7974,56.509,48.6963,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-14-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.508984248918686
  episode_reward_mean: 52.75214104370898
  episode_reward_min: 48.69633326543884
  episodes_this_iter: 8
  episodes_total: 3096
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 310272
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004739395808428526
        max_q: 3.797626256942749
        mean_q: 3.726083993911743
        mean_td_error: 0.06752273440361023
        min_q: 3.619492769241333
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012491742381826043
        max_q: 5.086197853088379
        mean_q: 4.963203430175781
        mean_td_error: 0.009836524724960327
        min_q: 4.831561088562012
    num_steps_sampled: 310272
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,303,512.997,310272,52.7521,56.509,48.6963,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-14-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.508984248918686
  episode_reward_mean: 52.52631159099181
  episode_reward_min: 48.69633326543884
  episodes_this_iter: 8
  episodes_total: 3128
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 313344
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005652349442243576
        max_q: 4.219477653503418
        mean_q: 4.1082539558410645
        mean_td_error: -0.07011082768440247
        min_q: 3.9696695804595947
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0005830693989992142
        max_q: 4.962369441986084
        mean_q: 4.91591215133667
        mean_td_error: 0.005141720175743103
        min_q: 4.81241512298584
    num_steps_sampled: 313344
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,306,518.293,313344,52.5263,56.509,48.6963,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-14-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.832995214756224
  episode_reward_mean: 51.9143948935024
  episode_reward_min: 48.69633326543884
  episodes_this_iter: 8
  episodes_total: 3160
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 316416
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009794152341783047
        max_q: 4.353390216827393
        mean_q: 4.2795186042785645
        mean_td_error: 0.11984008550643921
        min_q: 4.163280010223389
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0038382920902222395
        max_q: 4.9613494873046875
        mean_q: 4.904534816741943
        mean_td_error: -0.0764922946691513
        min_q: 4.847908020019531
    num_steps_sampled: 316416
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,309,523.518,316416,51.9144,55.833,48.6963,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-14-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.54802824195264
  episode_reward_mean: 51.57798395101804
  episode_reward_min: 48.69571262452226
  episodes_this_iter: 8
  episodes_total: 3192
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 319488
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019384983461350203
        max_q: 4.326625347137451
        mean_q: 4.26393985748291
        mean_td_error: 0.016150757670402527
        min_q: 4.214254379272461
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006585925817489624
        max_q: 4.760136604309082
        mean_q: 4.7010817527771
        mean_td_error: -0.10823030769824982
        min_q: 4.631219863891602
    num_steps_sampled: 319488
    num_steps_trained: 3184

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,312,528.848,319488,51.578,55.548,48.6957,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-14-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.54802824195264
  episode_reward_mean: 51.44194522292351
  episode_reward_min: 48.69571262452226
  episodes_this_iter: 16
  episodes_total: 3224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 322560
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0005334372981451452
        max_q: 4.4123640060424805
        mean_q: 4.336884498596191
        mean_td_error: -0.0029013752937316895
        min_q: 4.216433048248291
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019899443723261356
        max_q: 4.782326698303223
        mean_q: 4.719668865203857
        mean_td_error: 0.04372487962245941
        min_q: 4.650442600250244
    num_steps_sampled: 322560
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,315,534.303,322560,51.4419,55.548,48.6957,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-14-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.54802824195264
  episode_reward_mean: 51.9251693088442
  episode_reward_min: 48.69571262452226
  episodes_this_iter: 16
  episodes_total: 3256
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 325632
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013259007828310132
        max_q: 4.751156330108643
        mean_q: 4.698548793792725
        mean_td_error: 0.018783986568450928
        min_q: 4.610588073730469
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012256060726940632
        max_q: 4.902311325073242
        mean_q: 4.819215297698975
        mean_td_error: -0.009441182017326355
        min_q: 4.672946929931641
    num_steps_sampled: 325632
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,318,539.711,325632,51.9252,55.548,48.6957,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-14-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.140803008354425
  episode_reward_mean: 51.99103223634322
  episode_reward_min: 48.69571262452226
  episodes_this_iter: 8
  episodes_total: 3280
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 328704
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014898076187819242
        max_q: 5.023671627044678
        mean_q: 4.976297855377197
        mean_td_error: 0.028357580304145813
        min_q: 4.810696601867676
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005543853156268597
        max_q: 4.654257297515869
        mean_q: 4.543511390686035
        mean_td_error: -0.10640980303287506
        min_q: 4.425674915313721
    num_steps_sampled: 328704
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,321,545.998,328704,51.991,55.1408,48.6957,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-14-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.140803008354425
  episode_reward_mean: 52.249680331871424
  episode_reward_min: 47.277344807377034
  episodes_this_iter: 8
  episodes_total: 3312
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 331776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005617074202746153
        max_q: 4.991720199584961
        mean_q: 4.921914100646973
        mean_td_error: -0.12152419984340668
        min_q: 4.848726272583008
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003451951779425144
        max_q: 4.353774547576904
        mean_q: 4.227499008178711
        mean_td_error: -0.06055176258087158
        min_q: 4.09807014465332
    num_steps_sampled: 331776
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,324,552.306,331776,52.2497,55.1408,47.2773,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-14-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.438584079904
  episode_reward_mean: 52.53133475307234
  episode_reward_min: 47.277344807377034
  episodes_this_iter: 8
  episodes_total: 3344
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 334848
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0006236422341316938
        max_q: 5.074291706085205
        mean_q: 4.989936828613281
        mean_td_error: -0.0015173256397247314
        min_q: 4.877674102783203
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003603528253734112
        max_q: 4.267670631408691
        mean_q: 4.067826271057129
        mean_td_error: -0.04654305428266525
        min_q: 3.9367265701293945
    num_steps_sampled: 334848
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,327,558.385,334848,52.5313,56.4386,47.2773,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-14-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.059816507936546
  episode_reward_mean: 52.52108237507293
  episode_reward_min: 47.277344807377034
  episodes_this_iter: 8
  episodes_total: 3376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 337920
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012569966493174434
        max_q: 4.862705230712891
        mean_q: 4.8183183670043945
        mean_td_error: 0.026665925979614258
        min_q: 4.657508373260498
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028185993432998657
        max_q: 4.002507209777832
        mean_q: 3.8974294662475586
        mean_td_error: 0.05350179225206375
        min_q: 3.7065212726593018
    num_steps_sampled: 337920
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,330,564.333,337920,52.5211,57.0598,47.2773,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-15-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.059816507936546
  episode_reward_mean: 52.4586592692169
  episode_reward_min: 49.27021302133247
  episodes_this_iter: 16
  episodes_total: 3408
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 340992
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003644687822088599
        max_q: 4.859584808349609
        mean_q: 4.80106782913208
        mean_td_error: -0.07693237066268921
        min_q: 4.65803861618042
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010438053868710995
        max_q: 3.7547202110290527
        mean_q: 3.6175742149353027
        mean_td_error: -0.19674478471279144
        min_q: 3.3461742401123047
    num_steps_sampled: 340992
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,333,570.186,340992,52.4587,57.0598,49.2702,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-15-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.059816507936546
  episode_reward_mean: 52.102810799446786
  episode_reward_min: 49.27021302133247
  episodes_this_iter: 16
  episodes_total: 3440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 344064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007318435702472925
        max_q: 4.683481693267822
        mean_q: 4.612416744232178
        mean_td_error: -0.1384185403585434
        min_q: 4.526522636413574
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0036070835776627064
        max_q: 3.390711545944214
        mean_q: 3.2842116355895996
        mean_td_error: -0.06795033812522888
        min_q: 3.209210157394409
    num_steps_sampled: 344064
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,336,576.031,344064,52.1028,57.0598,49.2702,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-15-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.7253600787539
  episode_reward_mean: 52.148566635507265
  episode_reward_min: 47.07157737221172
  episodes_this_iter: 8
  episodes_total: 3464
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 347136
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013848243979737163
        max_q: 4.837599754333496
        mean_q: 4.713435173034668
        mean_td_error: 0.013909921050071716
        min_q: 4.598897457122803
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0043170335702598095
        max_q: 3.1528851985931396
        mean_q: 2.975893020629883
        mean_td_error: -0.07315849512815475
        min_q: 2.802276849746704
    num_steps_sampled: 347136
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,339,581.912,347136,52.1486,56.7254,47.0716,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-15-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.21156431365331
  episode_reward_mean: 51.93652159815747
  episode_reward_min: 47.07157737221172
  episodes_this_iter: 8
  episodes_total: 3496
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 350208
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011710768565535545
        max_q: 4.426029682159424
        mean_q: 4.305430889129639
        mean_td_error: -0.25534552335739136
        min_q: 4.087728023529053
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00452866917476058
        max_q: 3.244965076446533
        mean_q: 3.1655051708221436
        mean_td_error: -0.09220554679632187
        min_q: 3.0918707847595215
    num_steps_sampled: 350208
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,342,587.646,350208,51.9365,56.2116,47.0716,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-15-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.21156431365331
  episode_reward_mean: 51.861245407898004
  episode_reward_min: 46.141183478866715
  episodes_this_iter: 8
  episodes_total: 3528
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 353280
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011564347660169005
        max_q: 4.299592971801758
        mean_q: 4.216858386993408
        mean_td_error: -0.005252078175544739
        min_q: 4.1535515785217285
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007276390213519335
        max_q: 3.145900011062622
        mean_q: 2.9335479736328125
        mean_td_error: -0.13764023780822754
        min_q: 2.6314985752105713
    num_steps_sampled: 353280
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,345,593.44,353280,51.8612,56.2116,46.1412,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-15-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.58267000936565
  episode_reward_mean: 52.166514137590305
  episode_reward_min: 46.141183478866715
  episodes_this_iter: 8
  episodes_total: 3560
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 356352
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00425320491194725
        max_q: 4.536072254180908
        mean_q: 4.34907341003418
        mean_td_error: -0.06858223676681519
        min_q: 4.254655361175537
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003952968865633011
        max_q: 3.175426721572876
        mean_q: 2.9956583976745605
        mean_td_error: -0.07849954068660736
        min_q: 2.887777328491211
    num_steps_sampled: 356352
    num_steps_trained: 3

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,348,599.096,356352,52.1665,55.5827,46.1412,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-15-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.58267000936565
  episode_reward_mean: 52.09254453662199
  episode_reward_min: 46.141183478866715
  episodes_this_iter: 8
  episodes_total: 3592
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 359424
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007586078252643347
        max_q: 4.352968215942383
        mean_q: 3.9818429946899414
        mean_td_error: -0.1130269467830658
        min_q: 3.7560434341430664
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005405528005212545
        max_q: 2.2159507274627686
        mean_q: 1.9741933345794678
        mean_td_error: 0.09285511821508408
        min_q: 1.81670343875885
    num_steps_sampled: 359424
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,351,604.476,359424,52.0925,55.5827,46.1412,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-15-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.6254267534478
  episode_reward_mean: 52.75236977131736
  episode_reward_min: 46.141183478866715
  episodes_this_iter: 16
  episodes_total: 3624
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 362496
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003305840538814664
        max_q: 4.006107330322266
        mean_q: 3.9190151691436768
        mean_td_error: -0.05878432095050812
        min_q: 3.843189239501953
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015626706182956696
        max_q: 2.055757999420166
        mean_q: 1.8297843933105469
        mean_td_error: -0.34682193398475647
        min_q: 1.4049123525619507
    num_steps_sampled: 362496
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,354,609.581,362496,52.7524,55.6254,46.1412,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-15-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.6254267534478
  episode_reward_mean: 52.043727557122544
  episode_reward_min: 46.996734470716184
  episodes_this_iter: 8
  episodes_total: 3648
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 365568
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002511264057829976
        max_q: 3.9126904010772705
        mean_q: 3.8395421504974365
        mean_td_error: -0.04126212000846863
        min_q: 3.782958507537842
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013111723586916924
        max_q: 1.899580478668213
        mean_q: 1.7700393199920654
        mean_td_error: -0.261158287525177
        min_q: 1.6176574230194092
    num_steps_sampled: 365568
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,357,614.674,365568,52.0437,55.6254,46.9967,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-15-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.6254267534478
  episode_reward_mean: 51.65332917644227
  episode_reward_min: 46.996734470716184
  episodes_this_iter: 8
  episodes_total: 3680
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 368640
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007716652005910873
        max_q: 3.8600826263427734
        mean_q: 3.7600924968719482
        mean_td_error: -0.14888131618499756
        min_q: 3.667999744415283
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025341780856251717
        max_q: 2.3278884887695312
        mean_q: 2.1747195720672607
        mean_td_error: -0.040328361093997955
        min_q: 2.083550453186035
    num_steps_sampled: 368640
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,360,619.907,368640,51.6533,55.6254,46.9967,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-16-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.6254267534478
  episode_reward_mean: 50.97115851025961
  episode_reward_min: 46.996734470716184
  episodes_this_iter: 8
  episodes_total: 3712
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 371712
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003017574781551957
        max_q: 4.060898780822754
        mean_q: 3.9106457233428955
        mean_td_error: -0.04811294376850128
        min_q: 3.7462968826293945
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01914692297577858
        max_q: 2.4992194175720215
        mean_q: 2.327157974243164
        mean_td_error: -0.38834279775619507
        min_q: 2.1047775745391846
    num_steps_sampled: 371712
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,363,625.321,371712,50.9712,55.6254,46.9967,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-16-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.47362082201533
  episode_reward_mean: 51.487252219683626
  episode_reward_min: 48.55995191174942
  episodes_this_iter: 8
  episodes_total: 3744
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 374784
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027306671254336834
        max_q: 4.172222137451172
        mean_q: 4.028757095336914
        mean_td_error: -0.04138944298028946
        min_q: 3.9351935386657715
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009075726382434368
        max_q: 2.4629693031311035
        mean_q: 2.312386989593506
        mean_td_error: -0.190399631857872
        min_q: 2.107227087020874
    num_steps_sampled: 374784
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,366,630.809,374784,51.4873,56.4736,48.56,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-16-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.47362082201533
  episode_reward_mean: 52.255246232354594
  episode_reward_min: 48.742040716705375
  episodes_this_iter: 8
  episodes_total: 3776
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 377856
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021030218340456486
        max_q: 4.129469394683838
        mean_q: 3.8923959732055664
        mean_td_error: -0.011292174458503723
        min_q: 3.7793633937835693
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023447817657142878
        max_q: 2.647434711456299
        mean_q: 2.574903964996338
        mean_td_error: -0.04321185499429703
        min_q: 2.446462869644165
    num_steps_sampled: 377856
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,369,636.256,377856,52.2552,56.4736,48.742,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-16-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.47362082201533
  episode_reward_mean: 51.666425727278764
  episode_reward_min: 46.85206325016951
  episodes_this_iter: 16
  episodes_total: 3808
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 380928
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008065666072070599
        max_q: 4.188297271728516
        mean_q: 3.995612382888794
        mean_td_error: 0.04922250658273697
        min_q: 3.707796335220337
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009809551760554314
        max_q: 2.9589591026306152
        mean_q: 2.7846262454986572
        mean_td_error: -0.20101647078990936
        min_q: 2.4912548065185547
    num_steps_sampled: 380928
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,372,641.706,380928,51.6664,56.4736,46.8521,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-16-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.63950455037265
  episode_reward_mean: 51.255310535280486
  episode_reward_min: 46.85206325016951
  episodes_this_iter: 16
  episodes_total: 3840
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 384000
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025927606970071793
        max_q: 2.614703893661499
        mean_q: 1.4599214792251587
        mean_td_error: -0.19518345594406128
        min_q: 0.7293031215667725
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011117541231215
        max_q: 2.545259714126587
        mean_q: 2.428014039993286
        mean_td_error: -0.22973957657814026
        min_q: 2.2200772762298584
    num_steps_sampled: 384000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,375,647.138,384000,51.2553,55.6395,46.8521,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-16-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.63950455037265
  episode_reward_mean: 51.50519088768504
  episode_reward_min: 46.85206325016951
  episodes_this_iter: 8
  episodes_total: 3864
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 387072
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007596920244395733
        max_q: 1.6048710346221924
        mean_q: 1.340849757194519
        mean_td_error: 0.0667685866355896
        min_q: 0.9450525045394897
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002399467397481203
        max_q: 2.5408775806427
        mean_q: 2.320230484008789
        mean_td_error: -0.03695482015609741
        min_q: 2.0588674545288086
    num_steps_sampled: 387072
    num_steps_trained: 386

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,378,652.702,387072,51.5052,55.6395,46.8521,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-16-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.66653352087848
  episode_reward_mean: 51.50447944240761
  episode_reward_min: 46.85206325016951
  episodes_this_iter: 8
  episodes_total: 3896
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 390144
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002410536166280508
        max_q: 2.6853256225585938
        mean_q: 2.525423288345337
        mean_td_error: 0.00944054126739502
        min_q: 2.328944683074951
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017866722773760557
        max_q: 2.6525535583496094
        mean_q: 2.5247037410736084
        mean_td_error: -0.0444662943482399
        min_q: 2.3489058017730713
    num_steps_sampled: 390144
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,381,657.986,390144,51.5045,57.6665,46.8521,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-16-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.66653352087848
  episode_reward_mean: 52.39122751271274
  episode_reward_min: 48.020440399434904
  episodes_this_iter: 8
  episodes_total: 3928
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 393216
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004649747628718615
        max_q: 3.395357131958008
        mean_q: 3.325244665145874
        mean_td_error: 0.06012903153896332
        min_q: 3.173799753189087
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012418298283591866
        max_q: 2.793679714202881
        mean_q: 2.5648510456085205
        mean_td_error: -0.007365569472312927
        min_q: 2.4168519973754883
    num_steps_sampled: 393216
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,384,663.248,393216,52.3912,57.6665,48.0204,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-16-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.66653352087848
  episode_reward_mean: 52.08734302587943
  episode_reward_min: 48.020440399434904
  episodes_this_iter: 8
  episodes_total: 3960
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 396288
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006282289978116751
        max_q: 4.11032772064209
        mean_q: 4.000622272491455
        mean_td_error: 0.09743020683526993
        min_q: 3.929391622543335
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014941280242055655
        max_q: 3.160533905029297
        mean_q: 3.07174015045166
        mean_td_error: 0.03160622715950012
        min_q: 2.984544277191162
    num_steps_sampled: 396288
    num_steps_trained: 3952

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,387,668.674,396288,52.0873,57.6665,48.0204,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-16-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.42015952623115
  episode_reward_mean: 53.104943715660085
  episode_reward_min: 48.72645770547077
  episodes_this_iter: 16
  episodes_total: 3992
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 399360
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003353849984705448
        max_q: 4.385920524597168
        mean_q: 4.3354644775390625
        mean_td_error: 0.05001528561115265
        min_q: 4.186573505401611
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010476537048816681
        max_q: 3.2105765342712402
        mean_q: 3.110659122467041
        mean_td_error: -0.2729431092739105
        min_q: 2.965867280960083
    num_steps_sampled: 399360
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,390,674.169,399360,53.1049,59.4202,48.7265,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-17-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.42015952623115
  episode_reward_mean: 52.49443782795089
  episode_reward_min: 47.67949003475865
  episodes_this_iter: 16
  episodes_total: 4024
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 402432
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014434641925618052
        max_q: 4.698269367218018
        mean_q: 4.656981945037842
        mean_td_error: 0.019711986184120178
        min_q: 4.527777194976807
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00328696402721107
        max_q: 3.2985680103302
        mean_q: 3.0113348960876465
        mean_td_error: -0.06390179693698883
        min_q: 2.7121500968933105
    num_steps_sampled: 402432
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,393,679.539,402432,52.4944,59.4202,47.6795,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-17-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.42015952623115
  episode_reward_mean: 52.308984300628275
  episode_reward_min: 47.67949003475865
  episodes_this_iter: 8
  episodes_total: 4048
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 405504
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0034914612770080566
        max_q: 4.85251522064209
        mean_q: 4.761557102203369
        mean_td_error: 0.05468320846557617
        min_q: 4.666767120361328
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021602448541671038
        max_q: 3.380666732788086
        mean_q: 3.2820801734924316
        mean_td_error: 0.05347346514463425
        min_q: 3.1938278675079346
    num_steps_sampled: 405504
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,396,684.697,405504,52.309,59.4202,47.6795,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-17-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.42015952623115
  episode_reward_mean: 52.33670553500856
  episode_reward_min: 47.67949003475865
  episodes_this_iter: 8
  episodes_total: 4080
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 408576
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016159088118001819
        max_q: 4.640154838562012
        mean_q: 4.572814464569092
        mean_td_error: 0.017369285225868225
        min_q: 4.466518878936768
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005817800294607878
        max_q: 3.5946545600891113
        mean_q: 3.499077320098877
        mean_td_error: -0.1337568759918213
        min_q: 3.368222713470459
    num_steps_sampled: 408576
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,399,689.937,408576,52.3367,59.4202,47.6795,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-17-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.987359201051596
  episode_reward_mean: 52.0503430062965
  episode_reward_min: 48.54392160263856
  episodes_this_iter: 8
  episodes_total: 4112
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 411648
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002085602842271328
        max_q: 4.66052770614624
        mean_q: 4.493873119354248
        mean_td_error: 0.002832964062690735
        min_q: 4.377992630004883
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007917392067611217
        max_q: 3.5965311527252197
        mean_q: 3.307570457458496
        mean_td_error: -0.1653132289648056
        min_q: 3.155515432357788
    num_steps_sampled: 411648
    num_steps_trained: 410

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,402,695.401,411648,52.0503,56.9874,48.5439,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-17-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.987359201051596
  episode_reward_mean: 52.97600203469591
  episode_reward_min: 48.917983883283185
  episodes_this_iter: 8
  episodes_total: 4144
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 414720
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0042960005812346935
        max_q: 4.422242164611816
        mean_q: 4.324957370758057
        mean_td_error: 0.07636359333992004
        min_q: 4.227871894836426
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00965869426727295
        max_q: 3.4262778759002686
        mean_q: 3.338373899459839
        mean_td_error: -0.21663808822631836
        min_q: 3.249009847640991
    num_steps_sampled: 414720
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,405,701.026,414720,52.976,56.9874,48.918,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-17-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.226193508583044
  episode_reward_mean: 52.278398968100426
  episode_reward_min: 48.917983883283185
  episodes_this_iter: 16
  episodes_total: 4176
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 417792
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002418024465441704
        max_q: 4.422804832458496
        mean_q: 4.375117301940918
        mean_td_error: -0.030533432960510254
        min_q: 4.28796911239624
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013349518412724137
        max_q: 3.481647253036499
        mean_q: 3.3795347213745117
        mean_td_error: -0.028141774237155914
        min_q: 3.281128168106079
    num_steps_sampled: 417792
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,408,706.968,417792,52.2784,56.2262,48.918,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-17-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.226193508583044
  episode_reward_mean: 52.59600901409747
  episode_reward_min: 47.85191302879442
  episodes_this_iter: 16
  episodes_total: 4208
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 420864
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014589319471269846
        max_q: 3.9482858180999756
        mean_q: 3.8872878551483154
        mean_td_error: 0.0057172104716300964
        min_q: 3.835703134536743
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005109489429742098
        max_q: 3.264754056930542
        mean_q: 3.1247849464416504
        mean_td_error: -0.11807277053594589
        min_q: 3.0080959796905518
    num_steps_sampled: 420864
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,411,712.911,420864,52.596,56.2262,47.8519,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-17-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.02510511215776
  episode_reward_mean: 52.07168012455888
  episode_reward_min: 47.85191302879442
  episodes_this_iter: 8
  episodes_total: 4232
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 423936
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004685534629970789
        max_q: 4.2627973556518555
        mean_q: 4.061838626861572
        mean_td_error: 0.055816084146499634
        min_q: 3.969416379928589
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008305346593260765
        max_q: 3.199651002883911
        mean_q: 3.0384464263916016
        mean_td_error: -0.2087409496307373
        min_q: 2.896742343902588
    num_steps_sampled: 423936
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,414,718.677,423936,52.0717,56.0251,47.8519,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-17-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.02510511215776
  episode_reward_mean: 51.85198466076339
  episode_reward_min: 47.85191302879442
  episodes_this_iter: 8
  episodes_total: 4264
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 427008
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035520910751074553
        max_q: 3.993687629699707
        mean_q: 3.936302900314331
        mean_td_error: -0.038207538425922394
        min_q: 3.7821950912475586
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004648701287806034
        max_q: 3.2622833251953125
        mean_q: 2.9944863319396973
        mean_td_error: -0.09497006237506866
        min_q: 2.788883924484253
    num_steps_sampled: 427008
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,417,724.334,427008,51.852,56.0251,47.8519,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-17-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.69710489680922
  episode_reward_mean: 52.83934741785389
  episode_reward_min: 48.945911494712604
  episodes_this_iter: 8
  episodes_total: 4296
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 430080
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014540626667439938
        max_q: 3.862388849258423
        mean_q: 3.6737027168273926
        mean_td_error: -0.1382429599761963
        min_q: 3.4674015045166016
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008473417721688747
        max_q: 2.87589955329895
        mean_q: 2.275413990020752
        mean_td_error: -0.16695241630077362
        min_q: 2.019606113433838
    num_steps_sampled: 430080
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,420,729.998,430080,52.8393,57.6971,48.9459,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-17-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.69710489680922
  episode_reward_mean: 52.97325246301886
  episode_reward_min: 49.61980802898702
  episodes_this_iter: 8
  episodes_total: 4328
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 433152
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.026263386011123657
        max_q: 3.6117911338806152
        mean_q: 3.444749116897583
        mean_td_error: -0.3005240559577942
        min_q: 3.283128023147583
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010479812510311604
        max_q: 2.1961557865142822
        mean_q: 2.0264954566955566
        mean_td_error: -0.24491262435913086
        min_q: 1.7051074504852295
    num_steps_sampled: 433152
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,423,735.476,433152,52.9733,57.6971,49.6198,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-18-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.69710489680922
  episode_reward_mean: 53.162956629729
  episode_reward_min: 49.726052054309015
  episodes_this_iter: 8
  episodes_total: 4360
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 436224
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009385431185364723
        max_q: 3.876836061477661
        mean_q: 3.4622154235839844
        mean_td_error: -0.10870422422885895
        min_q: 3.298327684402466
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0031913109123706818
        max_q: 2.138387680053711
        mean_q: 2.0526747703552246
        mean_td_error: -0.06170038506388664
        min_q: 1.9426076412200928
    num_steps_sampled: 436224
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,426,740.94,436224,53.163,57.6971,49.7261,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-18-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.69710489680922
  episode_reward_mean: 52.74250964979771
  episode_reward_min: 48.083773087083976
  episodes_this_iter: 16
  episodes_total: 4392
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 439296
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00828678347170353
        max_q: 3.87155818939209
        mean_q: 3.7422990798950195
        mean_td_error: -0.08242971450090408
        min_q: 3.6804986000061035
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.055144499987363815
        max_q: 1.707155466079712
        mean_q: 1.4075344800949097
        mean_td_error: -0.7055653929710388
        min_q: 1.1291875839233398
    num_steps_sampled: 439296
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,429,746.263,439296,52.7425,57.6971,48.0838,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-18-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.19675306289702
  episode_reward_mean: 52.59593346647146
  episode_reward_min: 48.083773087083976
  episodes_this_iter: 8
  episodes_total: 4416
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 442368
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016601037234067917
        max_q: 3.762948989868164
        mean_q: 3.5491695404052734
        mean_td_error: -0.20547309517860413
        min_q: 3.2195303440093994
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0042089372873306274
        max_q: 1.1285852193832397
        mean_q: 0.9759519696235657
        mean_td_error: -0.02146058715879917
        min_q: 0.8180564641952515
    num_steps_sampled: 442368
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,432,751.526,442368,52.5959,58.1968,48.0838,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-18-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.19675306289702
  episode_reward_mean: 53.06444925326935
  episode_reward_min: 48.083773087083976
  episodes_this_iter: 8
  episodes_total: 4448
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 445440
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006762078497558832
        max_q: 3.1966183185577393
        mean_q: 2.95278263092041
        mean_td_error: -0.06690795719623566
        min_q: 2.7920846939086914
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03588787466287613
        max_q: 1.2208952903747559
        mean_q: 0.5568167567253113
        mean_td_error: -0.48010334372520447
        min_q: 0.26937174797058105
    num_steps_sampled: 445440
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,435,756.671,445440,53.0644,58.1968,48.0838,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-18-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.09162427104156
  episode_reward_mean: 53.990759446869816
  episode_reward_min: 48.083773087083976
  episodes_this_iter: 8
  episodes_total: 4480
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 448512
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001988732721656561
        max_q: 3.2513725757598877
        mean_q: 3.1321516036987305
        mean_td_error: 0.011794351041316986
        min_q: 3.0389912128448486
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02101232297718525
        max_q: 0.6218060255050659
        mean_q: 0.46211832761764526
        mean_td_error: -0.2752763628959656
        min_q: 0.234288290143013
    num_steps_sampled: 448512
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,438,762.111,448512,53.9908,60.0916,48.0838,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-18-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.497578081971014
  episode_reward_mean: 54.06276667681804
  episode_reward_min: 48.45013813561301
  episodes_this_iter: 8
  episodes_total: 4512
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 451584
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011895001865923405
        max_q: 2.9559860229492188
        mean_q: 2.801602840423584
        mean_td_error: -0.16367554664611816
        min_q: 2.7033181190490723
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03155077248811722
        max_q: 0.9959000945091248
        mean_q: 0.767611563205719
        mean_td_error: -0.37239885330200195
        min_q: 0.5039443969726562
    num_steps_sampled: 451584
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,441,767.581,451584,54.0628,60.4976,48.4501,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-18-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.497578081971014
  episode_reward_mean: 53.548025428303646
  episode_reward_min: 48.45013813561301
  episodes_this_iter: 8
  episodes_total: 4544
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 454656
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010280972346663475
        max_q: 2.9935481548309326
        mean_q: 2.8182597160339355
        mean_td_error: -0.13156670331954956
        min_q: 2.641010046005249
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.05444345250725746
        max_q: 0.13928881287574768
        mean_q: -0.18332719802856445
        mean_td_error: -0.5910689830780029
        min_q: -0.3202974200248718
    num_steps_sampled: 454656
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,444,773.092,454656,53.548,60.4976,48.4501,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-18-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.497578081971014
  episode_reward_mean: 52.58085403774278
  episode_reward_min: 48.45013813561301
  episodes_this_iter: 16
  episodes_total: 4576
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 457728
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0071107735857367516
        max_q: 3.1058216094970703
        mean_q: 2.9812726974487305
        mean_td_error: -0.09029600024223328
        min_q: 2.8099589347839355
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.037705305963754654
        max_q: 0.49228018522262573
        mean_q: 0.24765697121620178
        mean_td_error: -0.4490695595741272
        min_q: -0.09965881705284119
    num_steps_sampled: 457728
    num_steps

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,447,778.762,457728,52.5809,60.4976,48.4501,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-18-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.19037679340985
  episode_reward_mean: 52.20400761874827
  episode_reward_min: 47.430658776617136
  episodes_this_iter: 16
  episodes_total: 4608
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 460800
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021286839619278908
        max_q: 2.9957406520843506
        mean_q: 2.787663459777832
        mean_td_error: -0.25629013776779175
        min_q: 2.605145215988159
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.029621822759509087
        max_q: 0.4166952967643738
        mean_q: 0.14549453556537628
        mean_td_error: -0.3314710557460785
        min_q: -0.11652319133281708
    num_steps_sampled: 460800
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,450,784.456,460800,52.204,60.1904,47.4307,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-18-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.98471843271005
  episode_reward_mean: 52.38955321698756
  episode_reward_min: 47.430658776617136
  episodes_this_iter: 8
  episodes_total: 4632
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 463872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0077004642225801945
        max_q: 3.2311205863952637
        mean_q: 3.125513792037964
        mean_td_error: -0.09120413661003113
        min_q: 3.0648794174194336
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018514668568968773
        max_q: 1.4459978342056274
        mean_q: 0.990249752998352
        mean_td_error: -0.16385872662067413
        min_q: 0.723791241645813
    num_steps_sampled: 463872
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,453,790.137,463872,52.3896,56.9847,47.4307,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-19-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.709555559303446
  episode_reward_mean: 52.84439734124934
  episode_reward_min: 47.430658776617136
  episodes_this_iter: 8
  episodes_total: 4664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 466944
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019813382532447577
        max_q: 3.2610023021698
        mean_q: 3.199655294418335
        mean_td_error: -0.03622227907180786
        min_q: 3.1235008239746094
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008548854850232601
        max_q: 2.0319299697875977
        mean_q: 1.8613313436508179
        mean_td_error: 0.1240241527557373
        min_q: 1.7131669521331787
    num_steps_sampled: 466944
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,456,795.847,466944,52.8444,56.7096,47.4307,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-19-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.709555559303446
  episode_reward_mean: 52.73263456363935
  episode_reward_min: 47.464393120328914
  episodes_this_iter: 8
  episodes_total: 4696
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 470016
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018210799898952246
        max_q: 4.097869873046875
        mean_q: 4.050626754760742
        mean_td_error: 0.037292033433914185
        min_q: 3.983098030090332
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008375972509384155
        max_q: 2.7077066898345947
        mean_q: 2.5992486476898193
        mean_td_error: 0.11274707317352295
        min_q: 2.3884642124176025
    num_steps_sampled: 470016
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,459,801.498,470016,52.7326,56.7096,47.4644,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-19-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.709555559303446
  episode_reward_mean: 52.47764893519872
  episode_reward_min: 48.325941323698366
  episodes_this_iter: 8
  episodes_total: 4728
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 473088
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008459047530777752
        max_q: 4.418964862823486
        mean_q: 4.302973747253418
        mean_td_error: 0.0006766915321350098
        min_q: 4.217403411865234
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037185705732554197
        max_q: 3.6160340309143066
        mean_q: 3.5145912170410156
        mean_td_error: 0.04719649255275726
        min_q: 3.38505482673645
    num_steps_sampled: 473088
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,462,807.183,473088,52.4776,56.7096,48.3259,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-19-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.994044536296755
  episode_reward_mean: 51.656837950786596
  episode_reward_min: 48.325941323698366
  episodes_this_iter: 16
  episodes_total: 4760
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 476160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011011370224878192
        max_q: 4.54673433303833
        mean_q: 4.494300365447998
        mean_td_error: -0.010237723588943481
        min_q: 4.422001361846924
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008872375474311411
        max_q: 3.9299628734588623
        mean_q: 3.8406779766082764
        mean_td_error: 0.0025721266865730286
        min_q: 3.702256917953491
    num_steps_sampled: 476160
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,465,812.526,476160,51.6568,55.994,48.3259,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-19-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.842803443488364
  episode_reward_mean: 51.351584288311685
  episode_reward_min: 48.325941323698366
  episodes_this_iter: 16
  episodes_total: 4792
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 479232
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029332430567592382
        max_q: 4.560559272766113
        mean_q: 4.502508163452148
        mean_td_error: 0.064323291182518
        min_q: 4.445010662078857
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033454331569373608
        max_q: 4.23976469039917
        mean_q: 4.200336933135986
        mean_td_error: -0.0456620454788208
        min_q: 4.157054901123047
    num_steps_sampled: 479232
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,468,817.845,479232,51.3516,55.8428,48.3259,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-19-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.842803443488364
  episode_reward_mean: 51.552922515497585
  episode_reward_min: 48.325941323698366
  episodes_this_iter: 8
  episodes_total: 4816
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 482304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01292005181312561
        max_q: 4.284113883972168
        mean_q: 4.185408115386963
        mean_td_error: -0.1912146806716919
        min_q: 4.091946125030518
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004803099669516087
        max_q: 4.358269691467285
        mean_q: 4.240931987762451
        mean_td_error: -0.05830270051956177
        min_q: 4.012923717498779
    num_steps_sampled: 482304
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,471,823.127,482304,51.5529,55.8428,48.3259,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-19-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.842803443488364
  episode_reward_mean: 51.41765035435015
  episode_reward_min: 46.67492535818362
  episodes_this_iter: 8
  episodes_total: 4848
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 485376
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013445544056594372
        max_q: 3.923894166946411
        mean_q: 3.677811861038208
        mean_td_error: -0.23329490423202515
        min_q: 3.522573947906494
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005168254021555185
        max_q: 4.330455303192139
        mean_q: 4.287644386291504
        mean_td_error: 0.08187907934188843
        min_q: 4.211248397827148
    num_steps_sampled: 485376
    num_steps_trained: 48

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,474,828.486,485376,51.4177,55.8428,46.6749,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-19-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.49301022480406
  episode_reward_mean: 51.1593621308094
  episode_reward_min: 45.08105161066463
  episodes_this_iter: 8
  episodes_total: 4880
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 488448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007745427545160055
        max_q: 3.494107961654663
        mean_q: 3.426668643951416
        mean_td_error: -0.12102047353982925
        min_q: 3.322136878967285
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001828418462537229
        max_q: 4.474244594573975
        mean_q: 4.370811462402344
        mean_td_error: 0.02187328040599823
        min_q: 4.309470176696777
    num_steps_sampled: 488448
    num_steps_trained: 4874

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,477,834.023,488448,51.1594,55.493,45.0811,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-19-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.46442922094282
  episode_reward_mean: 51.274524736788244
  episode_reward_min: 45.08105161066463
  episodes_this_iter: 8
  episodes_total: 4912
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 491520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0052832323126494884
        max_q: 3.623009443283081
        mean_q: 3.3474745750427246
        mean_td_error: -0.08001533150672913
        min_q: 3.2208051681518555
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014560132287442684
        max_q: 4.2844390869140625
        mean_q: 4.2053022384643555
        mean_td_error: -0.23395715653896332
        min_q: 4.1205949783325195
    num_steps_sampled: 491520
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,480,839.725,491520,51.2745,56.4644,45.0811,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-19-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.46442922094282
  episode_reward_mean: 52.23594965737082
  episode_reward_min: 45.08105161066463
  episodes_this_iter: 16
  episodes_total: 4944
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 494592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0076521518640220165
        max_q: 2.9290826320648193
        mean_q: 2.507537364959717
        mean_td_error: -0.07449658960103989
        min_q: 2.1322436332702637
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002067893510684371
        max_q: 4.529878616333008
        mean_q: 4.404983043670654
        mean_td_error: 0.01458314061164856
        min_q: 4.292179584503174
    num_steps_sampled: 494592
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,483,845.5,494592,52.2359,56.4644,45.0811,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-19-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.46442922094282
  episode_reward_mean: 52.70216399244088
  episode_reward_min: 48.82564189586149
  episodes_this_iter: 16
  episodes_total: 4976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 497664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013239603489637375
        max_q: 2.8158252239227295
        mean_q: 2.3555312156677246
        mean_td_error: -0.18426159024238586
        min_q: 2.007859468460083
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004196971654891968
        max_q: 4.4754791259765625
        mean_q: 4.331431865692139
        mean_td_error: 0.06114174425601959
        min_q: 4.211744785308838
    num_steps_sampled: 497664
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,486,851.407,497664,52.7022,56.4644,48.8256,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-20-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.795189837253346
  episode_reward_mean: 52.526421976983414
  episode_reward_min: 48.74345451767029
  episodes_this_iter: 8
  episodes_total: 5000
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 500736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012111951597034931
        max_q: 2.09501314163208
        mean_q: 1.8217296600341797
        mean_td_error: -0.15536808967590332
        min_q: 1.2991224527359009
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004588615149259567
        max_q: 4.404139041900635
        mean_q: 4.29530668258667
        mean_td_error: 0.06928218901157379
        min_q: 4.164778709411621
    num_steps_sampled: 500736
    num_steps_trained: 4

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,489,857.288,500736,52.5264,57.7952,48.7435,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-20-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.795189837253346
  episode_reward_mean: 52.36442336752732
  episode_reward_min: 48.74345451767029
  episodes_this_iter: 8
  episodes_total: 5032
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 503808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008829133585095406
        max_q: 2.116325616836548
        mean_q: 1.7511391639709473
        mean_td_error: -0.07555225491523743
        min_q: 1.4832725524902344
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002633018186315894
        max_q: 4.321732997894287
        mean_q: 4.156565189361572
        mean_td_error: -0.0010521188378334045
        min_q: 4.016885280609131
    num_steps_sampled: 503808
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,492,863.194,503808,52.3644,57.7952,48.7435,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-20-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.795189837253346
  episode_reward_mean: 51.745107934232344
  episode_reward_min: 46.682628204934836
  episodes_this_iter: 8
  episodes_total: 5064
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 506880
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009496014565229416
        max_q: 1.1555119752883911
        mean_q: 0.8654475212097168
        mean_td_error: -0.13281095027923584
        min_q: 0.5015362501144409
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004250905476510525
        max_q: 4.5344133377075195
        mean_q: 4.407993316650391
        mean_td_error: 0.05581989884376526
        min_q: 4.347445487976074
    num_steps_sampled: 506880
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,495,869.11,506880,51.7451,57.7952,46.6826,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-20-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.795189837253346
  episode_reward_mean: 52.11714211106486
  episode_reward_min: 46.682628204934836
  episodes_this_iter: 8
  episodes_total: 5096
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 509952
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006494130939245224
        max_q: 1.6540309190750122
        mean_q: 1.5475149154663086
        mean_td_error: 0.1156027615070343
        min_q: 1.3705641031265259
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003823196282610297
        max_q: 4.458005428314209
        mean_q: 4.325725555419922
        mean_td_error: -0.04709161818027496
        min_q: 4.1508636474609375
    num_steps_sampled: 509952
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,498,875.058,509952,52.1171,57.7952,46.6826,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-20-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.34017454588534
  episode_reward_mean: 52.232566257311056
  episode_reward_min: 46.682628204934836
  episodes_this_iter: 8
  episodes_total: 5128
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 513024
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006178432609885931
        max_q: 2.823046922683716
        mean_q: 2.712594985961914
        mean_td_error: 0.1168096661567688
        min_q: 2.5800485610961914
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005178115330636501
        max_q: 4.422918796539307
        mean_q: 4.118498802185059
        mean_td_error: -0.01697615534067154
        min_q: 3.9288525581359863
    num_steps_sampled: 513024
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,501,880.973,513024,52.2326,55.3402,46.6826,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-20-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.95690932403443
  episode_reward_mean: 52.252509638917324
  episode_reward_min: 47.91443475149789
  episodes_this_iter: 16
  episodes_total: 5160
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 516096
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00339679978787899
        max_q: 3.584296226501465
        mean_q: 3.514346122741699
        mean_td_error: 0.06238409876823425
        min_q: 3.3923981189727783
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.024246513843536377
        max_q: 3.9792075157165527
        mean_q: 3.3258635997772217
        mean_td_error: 0.28679370880126953
        min_q: 3.073665142059326
    num_steps_sampled: 516096
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,504,887.027,516096,52.2525,54.9569,47.9144,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-20-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.40538259248036
  episode_reward_mean: 51.67975807741135
  episode_reward_min: 47.91443475149789
  episodes_this_iter: 8
  episodes_total: 5184
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 519168
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005035334266722202
        max_q: 3.9712202548980713
        mean_q: 3.9068455696105957
        mean_td_error: 0.08026745170354843
        min_q: 3.7553327083587646
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0350097231566906
        max_q: 2.623547077178955
        mean_q: 2.3480851650238037
        mean_td_error: -0.45251306891441345
        min_q: 2.242727518081665
    num_steps_sampled: 519168
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,507,892.711,519168,51.6798,54.4054,47.9144,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-20-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.31596867356749
  episode_reward_mean: 51.20958252801104
  episode_reward_min: 47.91443475149789
  episodes_this_iter: 8
  episodes_total: 5216
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 522240
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033757472410798073
        max_q: 4.177632808685303
        mean_q: 4.138650894165039
        mean_td_error: -0.06218935549259186
        min_q: 4.075313568115234
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033399672247469425
        max_q: 2.6178572177886963
        mean_q: 2.4462907314300537
        mean_td_error: 0.03361080586910248
        min_q: 2.2688889503479004
    num_steps_sampled: 522240
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,510,898.415,522240,51.2096,54.316,47.9144,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-20-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.9293638538251
  episode_reward_mean: 51.412166830317794
  episode_reward_min: 47.91443475149789
  episodes_this_iter: 8
  episodes_total: 5248
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 525312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004466854501515627
        max_q: 4.44123649597168
        mean_q: 4.387332916259766
        mean_td_error: -0.07873287796974182
        min_q: 4.3095808029174805
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016849564854055643
        max_q: 2.887484312057495
        mean_q: 2.7829577922821045
        mean_td_error: 0.016831055283546448
        min_q: 2.6632399559020996
    num_steps_sampled: 525312
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,513,904.034,525312,51.4122,55.9294,47.9144,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-20-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.98955974681803
  episode_reward_mean: 52.236671314634286
  episode_reward_min: 48.09685514752477
  episodes_this_iter: 8
  episodes_total: 5280
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 528384
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005404334515333176
        max_q: 4.529600143432617
        mean_q: 4.431656837463379
        mean_td_error: -0.09936653077602386
        min_q: 4.387056827545166
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017162997974082828
        max_q: 3.488507032394409
        mean_q: 3.385676860809326
        mean_td_error: -0.011453405022621155
        min_q: 3.2870371341705322
    num_steps_sampled: 528384
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,516,909.45,528384,52.2367,58.9896,48.0969,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-21-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.98955974681803
  episode_reward_mean: 52.98641952312416
  episode_reward_min: 48.637759598947255
  episodes_this_iter: 8
  episodes_total: 5312
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 531456
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004360325168818235
        max_q: 4.485986709594727
        mean_q: 4.452106952667236
        mean_td_error: -0.08130684494972229
        min_q: 4.42784309387207
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002525544026866555
        max_q: 3.251024007797241
        mean_q: 3.196046829223633
        mean_td_error: -0.024095185101032257
        min_q: 3.1214563846588135
    num_steps_sampled: 531456
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,519,914.672,531456,52.9864,58.9896,48.6378,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-21-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.98955974681803
  episode_reward_mean: 52.856298032150896
  episode_reward_min: 48.637759598947255
  episodes_this_iter: 16
  episodes_total: 5344
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 534528
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004935507196933031
        max_q: 4.548189163208008
        mean_q: 4.476505279541016
        mean_td_error: -0.08613891899585724
        min_q: 4.411603927612305
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006803239230066538
        max_q: 3.4547061920166016
        mean_q: 3.3060998916625977
        mean_td_error: -0.0631721019744873
        min_q: 3.166645050048828
    num_steps_sampled: 534528
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,522,919.815,534528,52.8563,58.9896,48.6378,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-21-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.98955974681803
  episode_reward_mean: 52.131203931924745
  episode_reward_min: 47.21299288162403
  episodes_this_iter: 16
  episodes_total: 5376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 537600
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014472667826339602
        max_q: 4.573348045349121
        mean_q: 4.480504989624023
        mean_td_error: -0.014129966497421265
        min_q: 4.4172139167785645
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035407254472374916
        max_q: 3.6322805881500244
        mean_q: 3.551323890686035
        mean_td_error: -0.02831438183784485
        min_q: 3.4914731979370117
    num_steps_sampled: 537600
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,525,925.057,537600,52.1312,58.9896,47.213,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-21-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.91505800101464
  episode_reward_mean: 51.957533465756924
  episode_reward_min: 47.21299288162403
  episodes_this_iter: 8
  episodes_total: 5400
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 540672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004325034096837044
        max_q: 4.524443626403809
        mean_q: 4.453888416290283
        mean_td_error: -0.07243302464485168
        min_q: 4.343066692352295
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006256879307329655
        max_q: 3.475905418395996
        mean_q: 3.1404333114624023
        mean_td_error: -0.04783182591199875
        min_q: 2.993220329284668
    num_steps_sampled: 540672
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,528,930.557,540672,51.9575,56.9151,47.213,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-21-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.91505800101464
  episode_reward_mean: 51.720606251677715
  episode_reward_min: 47.21299288162403
  episodes_this_iter: 8
  episodes_total: 5432
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 543744
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005538174416869879
        max_q: 4.348113059997559
        mean_q: 4.25031852722168
        mean_td_error: -0.09171010553836823
        min_q: 4.181365966796875
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014363379450514913
        max_q: 3.5616273880004883
        mean_q: 3.506214141845703
        mean_td_error: -0.008839324116706848
        min_q: 3.362388849258423
    num_steps_sampled: 543744
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,531,936.039,543744,51.7206,56.9151,47.213,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-21-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.55404771114352
  episode_reward_mean: 51.79449323773517
  episode_reward_min: 48.65352677502469
  episodes_this_iter: 8
  episodes_total: 5464
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 546816
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002488610800355673
        max_q: 4.314937114715576
        mean_q: 4.128909111022949
        mean_td_error: 0.001436181366443634
        min_q: 3.944519519805908
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006752643268555403
        max_q: 3.6659929752349854
        mean_q: 3.587965965270996
        mean_td_error: -0.09167422354221344
        min_q: 3.4489498138427734
    num_steps_sampled: 546816
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,534,941.541,546816,51.7945,55.554,48.6535,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-21-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.7746459756015
  episode_reward_mean: 52.44644246768724
  episode_reward_min: 48.65352677502469
  episodes_this_iter: 8
  episodes_total: 5496
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 549888
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0034977032337337732
        max_q: 4.216822624206543
        mean_q: 4.122323513031006
        mean_td_error: -0.06555482745170593
        min_q: 4.004255294799805
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016552556306123734
        max_q: 3.389227867126465
        mean_q: 3.2976861000061035
        mean_td_error: -0.2827602028846741
        min_q: 3.097254514694214
    num_steps_sampled: 549888
    num_steps_trained: 54

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,537,946.945,549888,52.4464,56.7746,48.6535,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-21-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.7746459756015
  episode_reward_mean: 52.44449222014785
  episode_reward_min: 48.233994399175444
  episodes_this_iter: 16
  episodes_total: 5528
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 552960
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004378748591989279
        max_q: 4.455986022949219
        mean_q: 4.323827743530273
        mean_td_error: 0.07941719889640808
        min_q: 4.2038373947143555
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00867997296154499
        max_q: 3.2117273807525635
        mean_q: 2.9832231998443604
        mean_td_error: -0.1284065693616867
        min_q: 2.862541437149048
    num_steps_sampled: 552960
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,540,952.256,552960,52.4445,56.7746,48.234,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-21-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.7746459756015
  episode_reward_mean: 52.952479767564974
  episode_reward_min: 48.233994399175444
  episodes_this_iter: 16
  episodes_total: 5560
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 556032
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014827428385615349
        max_q: 4.4210124015808105
        mean_q: 3.986187696456909
        mean_td_error: -0.259127676486969
        min_q: 3.7523624897003174
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004714122042059898
        max_q: 3.1022558212280273
        mean_q: 2.9236385822296143
        mean_td_error: -0.05903400480747223
        min_q: 2.74029803276062
    num_steps_sampled: 556032
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,543,957.648,556032,52.9525,56.7746,48.234,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-21-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.27819275546894
  episode_reward_mean: 52.576883690068115
  episode_reward_min: 48.233994399175444
  episodes_this_iter: 8
  episodes_total: 5584
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 559104
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.039532117545604706
        max_q: 3.8795032501220703
        mean_q: 3.3709115982055664
        mean_td_error: -0.7187946438789368
        min_q: 2.933436393737793
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012446574866771698
        max_q: 2.969926357269287
        mean_q: 2.85012149810791
        mean_td_error: -0.1844993233680725
        min_q: 2.6988048553466797
    num_steps_sampled: 559104
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,546,963.507,559104,52.5769,56.2782,48.234,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-22-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.222358850305255
  episode_reward_mean: 52.433610374628
  episode_reward_min: 48.151310446637375
  episodes_this_iter: 8
  episodes_total: 5616
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 562176
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002177721820771694
        max_q: 3.617976427078247
        mean_q: 3.4097342491149902
        mean_td_error: -0.026115290820598602
        min_q: 3.3289034366607666
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009154705330729485
        max_q: 2.9800028800964355
        mean_q: 2.8012068271636963
        mean_td_error: -0.1269063651561737
        min_q: 2.642961025238037
    num_steps_sampled: 562176
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,549,969.188,562176,52.4336,55.2224,48.1513,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-22-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.815190509557134
  episode_reward_mean: 52.32892299886034
  episode_reward_min: 48.151310446637375
  episodes_this_iter: 8
  episodes_total: 5648
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 565248
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014262475073337555
        max_q: 3.4861085414886475
        mean_q: 3.2051899433135986
        mean_td_error: -0.21897360682487488
        min_q: 3.103839159011841
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0232992060482502
        max_q: 2.244892120361328
        mean_q: 1.9691861867904663
        mean_td_error: -0.3231693208217621
        min_q: 1.8123215436935425
    num_steps_sampled: 565248
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,552,974.774,565248,52.3289,54.8152,48.1513,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-22-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.5363301519096
  episode_reward_mean: 52.646557590374194
  episode_reward_min: 48.151310446637375
  episodes_this_iter: 8
  episodes_total: 5680
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 568320
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005240594036877155
        max_q: 3.425208806991577
        mean_q: 3.3495583534240723
        mean_td_error: 0.09912700951099396
        min_q: 3.234022617340088
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004285510163754225
        max_q: 1.9568898677825928
        mean_q: 1.6173503398895264
        mean_td_error: -0.039419710636138916
        min_q: 1.3683629035949707
    num_steps_sampled: 568320
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,555,980.291,568320,52.6466,56.5363,48.1513,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-22-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.5363301519096
  episode_reward_mean: 52.35724600105712
  episode_reward_min: 48.29740509982294
  episodes_this_iter: 16
  episodes_total: 5712
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 571392
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017554571852087975
        max_q: 3.3652563095092773
        mean_q: 3.1842265129089355
        mean_td_error: -0.2858929932117462
        min_q: 3.0165162086486816
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022901215124875307
        max_q: 2.1278321743011475
        mean_q: 1.9547276496887207
        mean_td_error: -0.011319249868392944
        min_q: 1.8429080247879028
    num_steps_sampled: 571392
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,558,985.816,571392,52.3572,56.5363,48.2974,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-22-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.5363301519096
  episode_reward_mean: 51.835666874733505
  episode_reward_min: 46.17574695892267
  episodes_this_iter: 16
  episodes_total: 5744
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 574464
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0074124764651060104
        max_q: 3.361468553543091
        mean_q: 3.1194708347320557
        mean_td_error: -0.12354368716478348
        min_q: 2.9756994247436523
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030976026318967342
        max_q: 2.861030101776123
        mean_q: 2.613077163696289
        mean_td_error: 0.037573687732219696
        min_q: 2.446072578430176
    num_steps_sampled: 574464
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,561,991.328,574464,51.8357,56.5363,46.1757,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-22-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.49701663516733
  episode_reward_mean: 50.90114010818181
  episode_reward_min: 46.17574695892267
  episodes_this_iter: 8
  episodes_total: 5768
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 577536
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008917427621781826
        max_q: 3.2981221675872803
        mean_q: 3.1742327213287354
        mean_td_error: -0.14915555715560913
        min_q: 3.062208652496338
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004124926868826151
        max_q: 3.0786197185516357
        mean_q: 2.9171769618988037
        mean_td_error: -0.026735812425613403
        min_q: 2.7408010959625244
    num_steps_sampled: 577536
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,564,996.89,577536,50.9011,56.497,46.1757,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-22-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.49701663516733
  episode_reward_mean: 50.69344088652458
  episode_reward_min: 46.17574695892267
  episodes_this_iter: 8
  episodes_total: 5800
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 580608
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009329348802566528
        max_q: 3.5710175037384033
        mean_q: 3.4713664054870605
        mean_td_error: -0.15759222209453583
        min_q: 3.266873836517334
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017864061519503593
        max_q: 2.9491796493530273
        mean_q: 2.6694490909576416
        mean_td_error: -0.20957207679748535
        min_q: 2.4773857593536377
    num_steps_sampled: 580608
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,567,1002.41,580608,50.6934,56.497,46.1757,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-22-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.831174735864224
  episode_reward_mean: 52.5274618813104
  episode_reward_min: 47.78595974132742
  episodes_this_iter: 8
  episodes_total: 5832
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 583680
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004126851446926594
        max_q: 3.265019178390503
        mean_q: 2.858315944671631
        mean_td_error: -0.05475287139415741
        min_q: 2.5397555828094482
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004020425956696272
        max_q: 3.0001118183135986
        mean_q: 2.9256410598754883
        mean_td_error: 0.06781596690416336
        min_q: 2.799285411834717
    num_steps_sampled: 583680
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,570,1008,583680,52.5275,58.8312,47.786,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-22-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.07403336483943
  episode_reward_mean: 53.38212831744837
  episode_reward_min: 47.89931584880728
  episodes_this_iter: 8
  episodes_total: 5864
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 586752
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01140530500560999
        max_q: 3.298905611038208
        mean_q: 3.008089780807495
        mean_td_error: -0.24837562441825867
        min_q: 2.8232977390289307
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010310742072761059
        max_q: 3.67069673538208
        mean_q: 3.559567928314209
        mean_td_error: -0.17998570203781128
        min_q: 3.503567695617676
    num_steps_sampled: 586752
    num_steps_trained: 585

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,573,1013.74,586752,53.3821,60.074,47.8993,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-22-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.07403336483943
  episode_reward_mean: 55.02560367798142
  episode_reward_min: 49.85926794082651
  episodes_this_iter: 8
  episodes_total: 5896
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 589824
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016080053756013513
        max_q: 3.4363911151885986
        mean_q: 3.345766067504883
        mean_td_error: 0.026139721274375916
        min_q: 3.2255756855010986
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005239155609160662
        max_q: 4.101717472076416
        mean_q: 3.981247901916504
        mean_td_error: 0.10287808626890182
        min_q: 3.903621196746826
    num_steps_sampled: 589824
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,576,1019.47,589824,55.0256,60.074,49.8593,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-22-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.07403336483943
  episode_reward_mean: 55.09014759280005
  episode_reward_min: 50.191259812452174
  episodes_this_iter: 16
  episodes_total: 5928
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 592896
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009893744252622128
        max_q: 3.456772804260254
        mean_q: 3.231090545654297
        mean_td_error: -0.10411644726991653
        min_q: 2.96917986869812
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011159423738718033
        max_q: 4.026120662689209
        mean_q: 3.9435367584228516
        mean_td_error: -0.2141614854335785
        min_q: 3.877864122390747
    num_steps_sampled: 592896
    num_steps_trained: 5

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,579,1025.18,592896,55.0901,60.074,50.1913,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-23-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.07403336483943
  episode_reward_mean: 55.26023163446751
  episode_reward_min: 50.6418341238556
  episodes_this_iter: 8
  episodes_total: 5952
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 595968
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016182303428649902
        max_q: 2.845306873321533
        mean_q: 2.415493965148926
        mean_td_error: -0.18130265176296234
        min_q: 2.0539846420288086
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009194147773087025
        max_q: 3.775681734085083
        mean_q: 3.6616196632385254
        mean_td_error: -0.1658805012702942
        min_q: 3.587738037109375
    num_steps_sampled: 595968
    num_steps_trained: 59

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,582,1030.89,595968,55.2602,60.074,50.6418,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-23-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.28383772980651
  episode_reward_mean: 54.85108090926171
  episode_reward_min: 49.80478156625447
  episodes_this_iter: 8
  episodes_total: 5984
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 599040
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012566527351737022
        max_q: 2.045619487762451
        mean_q: 1.3285719156265259
        mean_td_error: -0.11873260140419006
        min_q: 1.1011430025100708
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030862849671393633
        max_q: 3.9459292888641357
        mean_q: 3.803223133087158
        mean_td_error: -0.05652822554111481
        min_q: 3.7116520404815674
    num_steps_sampled: 599040
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,585,1036.6,599040,54.8511,59.2838,49.8048,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-23-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.28383772980651
  episode_reward_mean: 54.45177533085562
  episode_reward_min: 49.80478156625447
  episodes_this_iter: 8
  episodes_total: 6016
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 602112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023202549666166306
        max_q: 1.201038122177124
        mean_q: 1.0564525127410889
        mean_td_error: -0.2412620335817337
        min_q: 0.9011912941932678
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015993155539035797
        max_q: 4.222529411315918
        mean_q: 4.118388652801514
        mean_td_error: -0.017337851226329803
        min_q: 3.9991695880889893
    num_steps_sampled: 602112
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,588,1042.5,602112,54.4518,59.2838,49.8048,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-23-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.62710412933888
  episode_reward_mean: 54.72450506044237
  episode_reward_min: 49.80478156625447
  episodes_this_iter: 8
  episodes_total: 6048
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 605184
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021306905895471573
        max_q: 1.6071430444717407
        mean_q: 1.2449047565460205
        mean_td_error: -0.21686212718486786
        min_q: 0.9496769905090332
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.000946587068028748
        max_q: 4.334255218505859
        mean_q: 4.255733013153076
        mean_td_error: 0.002732396125793457
        min_q: 4.126580715179443
    num_steps_sampled: 605184
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,591,1048.14,605184,54.7245,58.6271,49.8048,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-23-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.31965762593033
  episode_reward_mean: 55.54425191811126
  episode_reward_min: 51.18391448076434
  episodes_this_iter: 8
  episodes_total: 6080
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 608256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021790267899632454
        max_q: 1.4704445600509644
        mean_q: 1.2597576379776
        mean_td_error: -0.20512032508850098
        min_q: 1.089927077293396
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0052017769776284695
        max_q: 4.4537835121154785
        mean_q: 4.348723411560059
        mean_td_error: -0.088883176445961
        min_q: 4.152278900146484
    num_steps_sampled: 608256
    num_steps_trained: 607

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,594,1053.67,608256,55.5443,59.3197,51.1839,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-23-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.31965762593033
  episode_reward_mean: 55.497461726514594
  episode_reward_min: 50.977473435853234
  episodes_this_iter: 16
  episodes_total: 6112
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 611328
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008701043203473091
        max_q: 1.7546555995941162
        mean_q: 1.4794200658798218
        mean_td_error: 0.08639471232891083
        min_q: 1.2807197570800781
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004103870131075382
        max_q: 4.333986282348633
        mean_q: 4.2543110847473145
        mean_td_error: -0.07443864643573761
        min_q: 4.198360443115234
    num_steps_sampled: 611328
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,597,1059.18,611328,55.4975,59.3197,50.9775,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-23-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.31965762593033
  episode_reward_mean: 54.78671279332623
  episode_reward_min: 49.561367591245094
  episodes_this_iter: 16
  episodes_total: 6144
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 614400
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007287012878805399
        max_q: 2.302731513977051
        mean_q: 2.0419840812683105
        mean_td_error: -0.05881017446517944
        min_q: 1.9410291910171509
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010241558775305748
        max_q: 4.359125137329102
        mean_q: 4.078288555145264
        mean_td_error: -0.09379030019044876
        min_q: 3.864180326461792
    num_steps_sampled: 614400
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,600,1064.7,614400,54.7867,59.3197,49.5614,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-23-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.36801920158083
  episode_reward_mean: 54.587646789018
  episode_reward_min: 49.561367591245094
  episodes_this_iter: 8
  episodes_total: 6168
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 617472
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005722923204302788
        max_q: 2.716129779815674
        mean_q: 2.602320432662964
        mean_td_error: -0.05648744851350784
        min_q: 2.431838035583496
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011238387785851955
        max_q: 3.4795451164245605
        mean_q: 3.3483614921569824
        mean_td_error: -0.19213831424713135
        min_q: 3.0984504222869873
    num_steps_sampled: 617472
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,603,1070.14,617472,54.5876,59.368,49.5614,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-23-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.36801920158083
  episode_reward_mean: 53.63536929095162
  episode_reward_min: 49.561367591245094
  episodes_this_iter: 8
  episodes_total: 6200
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 620544
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03030138835310936
        max_q: 2.573453664779663
        mean_q: 2.400960922241211
        mean_td_error: -0.32938843965530396
        min_q: 2.266002655029297
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019081149250268936
        max_q: 3.5462594032287598
        mean_q: 3.3273651599884033
        mean_td_error: -0.28911542892456055
        min_q: 3.089827537536621
    num_steps_sampled: 620544
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,606,1075.5,620544,53.6354,59.368,49.5614,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-23-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.36801920158083
  episode_reward_mean: 54.53168358858826
  episode_reward_min: 50.53824184563193
  episodes_this_iter: 8
  episodes_total: 6232
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 623616
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009494142606854439
        max_q: 3.0463955402374268
        mean_q: 2.920548439025879
        mean_td_error: -0.10250314325094223
        min_q: 2.841582775115967
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014055121690034866
        max_q: 3.209260940551758
        mean_q: 3.0796353816986084
        mean_td_error: -0.21850773692131042
        min_q: 2.921762704849243
    num_steps_sampled: 623616
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,609,1080.85,623616,54.5317,59.368,50.5382,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-24-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.290706837336735
  episode_reward_mean: 54.396283784388515
  episode_reward_min: 49.27402368265072
  episodes_this_iter: 8
  episodes_total: 6264
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 626688
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014378037303686142
        max_q: 3.2671897411346436
        mean_q: 3.085845708847046
        mean_td_error: -0.17098522186279297
        min_q: 2.954735517501831
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0036878385581076145
        max_q: 3.151414394378662
        mean_q: 3.1204187870025635
        mean_td_error: 0.053428106009960175
        min_q: 3.0500783920288086
    num_steps_sampled: 626688
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,612,1086.28,626688,54.3963,59.2907,49.274,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-24-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.17080123978106
  episode_reward_mean: 53.95706572818609
  episode_reward_min: 49.27402368265072
  episodes_this_iter: 16
  episodes_total: 6296
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 629760
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005463877692818642
        max_q: 3.4738717079162598
        mean_q: 3.3420753479003906
        mean_td_error: -0.05839492380619049
        min_q: 3.2162108421325684
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0041070557199418545
        max_q: 3.730729818344116
        mean_q: 3.6278839111328125
        mean_td_error: 0.09678927809000015
        min_q: 3.5228750705718994
    num_steps_sampled: 629760
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,615,1091.8,629760,53.9571,58.1708,49.274,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-24-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.98470789088328
  episode_reward_mean: 53.42424955585508
  episode_reward_min: 48.587148021509684
  episodes_this_iter: 16
  episodes_total: 6328
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 632832
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005695579573512077
        max_q: 3.828761100769043
        mean_q: 3.7084245681762695
        mean_td_error: 0.06701554358005524
        min_q: 3.615595579147339
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007792378775775433
        max_q: 3.9263851642608643
        mean_q: 3.786501884460449
        mean_td_error: -0.1611703336238861
        min_q: 3.7075889110565186
    num_steps_sampled: 632832
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,618,1097.32,632832,53.4242,57.9847,48.5871,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-24-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.809837036231706
  episode_reward_mean: 53.148499744316126
  episode_reward_min: 48.587148021509684
  episodes_this_iter: 8
  episodes_total: 6352
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 635904
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010033986531198025
        max_q: 3.940227746963501
        mean_q: 3.769953727722168
        mean_td_error: -0.09010670334100723
        min_q: 3.6326417922973633
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009480408043600619
        max_q: 4.112774848937988
        mean_q: 4.036542892456055
        mean_td_error: -0.017585404217243195
        min_q: 3.9619643688201904
    num_steps_sampled: 635904
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,621,1102.88,635904,53.1485,57.8098,48.5871,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-24-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.65962830058222
  episode_reward_mean: 53.11887247605936
  episode_reward_min: 47.616773218988136
  episodes_this_iter: 8
  episodes_total: 6384
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 638976
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003329333383589983
        max_q: 3.6896331310272217
        mean_q: 3.5242481231689453
        mean_td_error: -0.027966484427452087
        min_q: 3.411958694458008
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013106632977724075
        max_q: 4.416877746582031
        mean_q: 4.361198425292969
        mean_td_error: -0.02345070242881775
        min_q: 4.275486469268799
    num_steps_sampled: 638976
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,624,1108.42,638976,53.1189,57.6596,47.6168,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-24-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.492889426861645
  episode_reward_mean: 53.53169819421485
  episode_reward_min: 47.616773218988136
  episodes_this_iter: 8
  episodes_total: 6416
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 642048
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002749380888417363
        max_q: 3.704979658126831
        mean_q: 3.536100387573242
        mean_td_error: -0.00888349860906601
        min_q: 3.3574466705322266
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00797059666365385
        max_q: 4.358521461486816
        mean_q: 4.126120090484619
        mean_td_error: -0.16229747235774994
        min_q: 3.9706954956054688
    num_steps_sampled: 642048
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,627,1114.14,642048,53.5317,57.4929,47.6168,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-24-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.00436810514029
  episode_reward_mean: 53.60591519204694
  episode_reward_min: 47.616773218988136
  episodes_this_iter: 8
  episodes_total: 6448
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 645120
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008000767789781094
        max_q: 3.905958652496338
        mean_q: 3.84694242477417
        mean_td_error: 0.0932692363858223
        min_q: 3.6885294914245605
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006310876924544573
        max_q: 3.981285572052002
        mean_q: 3.8539938926696777
        mean_td_error: -0.13355734944343567
        min_q: 3.744255304336548
    num_steps_sampled: 645120
    num_steps_trained: 64

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,630,1120.27,645120,53.6059,57.0044,47.6168,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-24-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.00436810514029
  episode_reward_mean: 53.48549320892532
  episode_reward_min: 49.23743918728257
  episodes_this_iter: 16
  episodes_total: 6480
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 648192
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.020218076184391975
        max_q: 3.6527769565582275
        mean_q: 3.5466909408569336
        mean_td_error: -0.2353888750076294
        min_q: 3.2338225841522217
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010216773953288794
        max_q: 3.937074661254883
        mean_q: 3.841885805130005
        mean_td_error: 0.01239883154630661
        min_q: 3.7793333530426025
    num_steps_sampled: 648192
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,633,1126.53,648192,53.4855,57.0044,49.2374,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-24-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.46846957609143
  episode_reward_mean: 53.701934193864936
  episode_reward_min: 49.23743918728257
  episodes_this_iter: 16
  episodes_total: 6512
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 651264
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01605542004108429
        max_q: 3.458796739578247
        mean_q: 3.1438019275665283
        mean_td_error: -0.1808900535106659
        min_q: 2.9666430950164795
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006660087034106255
        max_q: 4.112576961517334
        mean_q: 3.945878028869629
        mean_td_error: -0.1271413117647171
        min_q: 3.7351937294006348
    num_steps_sampled: 651264
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,636,1132.58,651264,53.7019,57.4685,49.2374,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-24-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.50756044994816
  episode_reward_mean: 54.23654952068282
  episode_reward_min: 49.23743918728257
  episodes_this_iter: 8
  episodes_total: 6536
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 654336
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008233743719756603
        max_q: 2.888397216796875
        mean_q: 2.6911633014678955
        mean_td_error: -0.07518159598112106
        min_q: 2.5221400260925293
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029048840515315533
        max_q: 4.339783191680908
        mean_q: 4.173833847045898
        mean_td_error: -0.05439788103103638
        min_q: 4.090451717376709
    num_steps_sampled: 654336
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,639,1138.77,654336,54.2365,58.5076,49.2374,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-25-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.50756044994816
  episode_reward_mean: 54.28325562111714
  episode_reward_min: 48.79194796106812
  episodes_this_iter: 8
  episodes_total: 6568
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 657408
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0044199381954967976
        max_q: 2.5679237842559814
        mean_q: 2.404928207397461
        mean_td_error: -0.022632136940956116
        min_q: 2.152132987976074
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018179756589233875
        max_q: 4.2155656814575195
        mean_q: 4.041170597076416
        mean_td_error: 0.013635426759719849
        min_q: 3.720618724822998
    num_steps_sampled: 657408
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,642,1144.95,657408,54.2833,58.5076,48.7919,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-25-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.50756044994816
  episode_reward_mean: 53.93341110396268
  episode_reward_min: 48.79194796106812
  episodes_this_iter: 8
  episodes_total: 6600
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 660480
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005229967180639505
        max_q: 3.015765428543091
        mean_q: 2.861581802368164
        mean_td_error: -0.048182643949985504
        min_q: 2.641301393508911
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007301481906324625
        max_q: 4.301227569580078
        mean_q: 4.141661643981934
        mean_td_error: 0.15055836737155914
        min_q: 3.9848694801330566
    num_steps_sampled: 660480
    num_steps_trained: 6

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,645,1151.28,660480,53.9334,58.5076,48.7919,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-25-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.59800528008233
  episode_reward_mean: 53.355027111463585
  episode_reward_min: 47.32375411338991
  episodes_this_iter: 8
  episodes_total: 6632
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 663552
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02384900115430355
        max_q: 2.9396960735321045
        mean_q: 2.7067620754241943
        mean_td_error: -0.2713232636451721
        min_q: 2.5319597721099854
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0041693332605063915
        max_q: 4.079754829406738
        mean_q: 3.9284255504608154
        mean_td_error: -0.06712912023067474
        min_q: 3.8052797317504883
    num_steps_sampled: 663552
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,648,1157.29,663552,53.355,57.598,47.3238,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-25-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.59800528008233
  episode_reward_mean: 53.26528227980264
  episode_reward_min: 47.32375411338991
  episodes_this_iter: 8
  episodes_total: 6664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 666624
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02122606709599495
        max_q: 2.5524182319641113
        mean_q: 2.2280006408691406
        mean_td_error: -0.25375500321388245
        min_q: 1.9874436855316162
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011504358844831586
        max_q: 4.202493667602539
        mean_q: 4.110866069793701
        mean_td_error: -0.008878976106643677
        min_q: 4.028443336486816
    num_steps_sampled: 666624
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,651,1163.3,666624,53.2653,57.598,47.3238,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-25-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.59800528008233
  episode_reward_mean: 51.91696263803293
  episode_reward_min: 45.932726713204566
  episodes_this_iter: 16
  episodes_total: 6696
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 669696
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02064567245543003
        max_q: 2.599116563796997
        mean_q: 2.4524729251861572
        mean_td_error: -0.21418610215187073
        min_q: 2.3180854320526123
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019701970741152763
        max_q: 4.093719959259033
        mean_q: 3.951448917388916
        mean_td_error: -0.018467977643013
        min_q: 3.8488612174987793
    num_steps_sampled: 669696
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,654,1169.25,669696,51.917,57.598,45.9327,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-25-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.175290834451644
  episode_reward_mean: 51.776299163553375
  episode_reward_min: 45.932726713204566
  episodes_this_iter: 8
  episodes_total: 6720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 672768
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029645604081451893
        max_q: 3.1452887058258057
        mean_q: 3.0269596576690674
        mean_td_error: 0.03908947855234146
        min_q: 2.935210943222046
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018907581688836217
        max_q: 4.337370872497559
        mean_q: 4.227348804473877
        mean_td_error: -0.027782529592514038
        min_q: 4.057919025421143
    num_steps_sampled: 672768
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,657,1175.25,672768,51.7763,57.1753,45.9327,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-25-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.53437691785232
  episode_reward_mean: 51.0531486569333
  episode_reward_min: 45.932726713204566
  episodes_this_iter: 8
  episodes_total: 6752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 675840
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007187752053141594
        max_q: 3.2371749877929688
        mean_q: 3.1577224731445312
        mean_td_error: -0.09457730501890182
        min_q: 3.0110294818878174
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015554850921034813
        max_q: 3.9672365188598633
        mean_q: 3.866014003753662
        mean_td_error: -0.2941649258136749
        min_q: 3.7318813800811768
    num_steps_sampled: 675840
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,660,1181.1,675840,51.0531,55.5344,45.9327,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-25-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.71067293726273
  episode_reward_mean: 51.07423638478702
  episode_reward_min: 45.932726713204566
  episodes_this_iter: 8
  episodes_total: 6784
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 678912
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002546062460169196
        max_q: 3.6003496646881104
        mean_q: 3.5039398670196533
        mean_td_error: -0.025787942111492157
        min_q: 3.3833601474761963
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00630970811471343
        max_q: 3.92238450050354
        mean_q: 3.7911338806152344
        mean_td_error: -0.1142052486538887
        min_q: 3.6578924655914307
    num_steps_sampled: 678912
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,663,1186.86,678912,51.0742,54.7107,45.9327,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-25-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.38877743696743
  episode_reward_mean: 51.709296043309365
  episode_reward_min: 48.25589650654021
  episodes_this_iter: 8
  episodes_total: 6816
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 681984
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008339094929397106
        max_q: 3.6420888900756836
        mean_q: 3.5220439434051514
        mean_td_error: -0.10964465886354446
        min_q: 3.384481906890869
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004973962903022766
        max_q: 3.4817183017730713
        mean_q: 3.3800432682037354
        mean_td_error: -0.09272164851427078
        min_q: 3.3160765171051025
    num_steps_sampled: 681984
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,666,1192.62,681984,51.7093,57.3888,48.2559,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-25-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.38877743696743
  episode_reward_mean: 52.253041731534914
  episode_reward_min: 48.6386139330553
  episodes_this_iter: 8
  episodes_total: 6848
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 685056
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0031681268010288477
        max_q: 3.831559896469116
        mean_q: 3.7305071353912354
        mean_td_error: 0.028818443417549133
        min_q: 3.640460968017578
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018161789048463106
        max_q: 3.953197956085205
        mean_q: 3.837857961654663
        mean_td_error: 0.02909853309392929
        min_q: 3.7221853733062744
    num_steps_sampled: 685056
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,669,1198.32,685056,52.253,57.3888,48.6386,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-26-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.38877743696743
  episode_reward_mean: 50.99858301411184
  episode_reward_min: 46.43557887594045
  episodes_this_iter: 16
  episodes_total: 6880
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 688128
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00995780248194933
        max_q: 4.061760902404785
        mean_q: 3.976053476333618
        mean_td_error: -0.12954536080360413
        min_q: 3.8967549800872803
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03582141920924187
        max_q: 4.14376163482666
        mean_q: 3.5530893802642822
        mean_td_error: -0.4043257236480713
        min_q: 3.2517337799072266
    num_steps_sampled: 688128
    num_steps_trained: 68

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,672,1203.9,688128,50.9986,57.3888,46.4356,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-26-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.37880280496224
  episode_reward_mean: 50.627477133168775
  episode_reward_min: 46.43557887594045
  episodes_this_iter: 16
  episodes_total: 6912
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 691200
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0064893146045506
        max_q: 3.951890230178833
        mean_q: 3.867095470428467
        mean_td_error: -0.08932476490736008
        min_q: 3.727216958999634
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021516690030694008
        max_q: 3.1539363861083984
        mean_q: 2.886465072631836
        mean_td_error: 0.2752595543861389
        min_q: 2.698467254638672
    num_steps_sampled: 691200
    num_steps_trained: 690

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,675,1209.35,691200,50.6275,57.3788,46.4356,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-26-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.69009163120792
  episode_reward_mean: 49.96928408225391
  episode_reward_min: 46.43557887594045
  episodes_this_iter: 8
  episodes_total: 6936
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 694272
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00463508628308773
        max_q: 3.809248447418213
        mean_q: 3.6643128395080566
        mean_td_error: -0.05476406216621399
        min_q: 3.472400426864624
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.029031282290816307
        max_q: 2.3582446575164795
        mean_q: 2.1513586044311523
        mean_td_error: -0.30376747250556946
        min_q: 1.8331156969070435
    num_steps_sampled: 694272
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,678,1214.72,694272,49.9693,54.6901,46.4356,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-26-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.16954930376287
  episode_reward_mean: 50.450597997523616
  episode_reward_min: 46.9575188841245
  episodes_this_iter: 8
  episodes_total: 6968
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 697344
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004370906390249729
        max_q: 3.776564598083496
        mean_q: 3.7152788639068604
        mean_td_error: -0.054693736135959625
        min_q: 3.6295552253723145
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009184101596474648
        max_q: 1.9987519979476929
        mean_q: 1.572853684425354
        mean_td_error: -0.08176393806934357
        min_q: 1.4222959280014038
    num_steps_sampled: 697344
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,681,1220.24,697344,50.4506,53.1695,46.9575,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-26-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.84241074942641
  episode_reward_mean: 50.901714235728505
  episode_reward_min: 46.9575188841245
  episodes_this_iter: 8
  episodes_total: 7000
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 700416
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004646869841963053
        max_q: 3.995347261428833
        mean_q: 3.8851194381713867
        mean_td_error: -0.07393794506788254
        min_q: 3.7993781566619873
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00987133290618658
        max_q: 1.947475552558899
        mean_q: 1.8463897705078125
        mean_td_error: -0.11880137026309967
        min_q: 1.757684350013733
    num_steps_sampled: 700416
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,684,1225.93,700416,50.9017,54.8424,46.9575,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-26-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.84241074942641
  episode_reward_mean: 51.02295204726997
  episode_reward_min: 46.9575188841245
  episodes_this_iter: 8
  episodes_total: 7032
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 703488
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012143744388595223
        max_q: 4.202208995819092
        mean_q: 4.136898517608643
        mean_td_error: 0.02126225084066391
        min_q: 4.104247570037842
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004730095621198416
        max_q: 2.4164812564849854
        mean_q: 2.2292640209198
        mean_td_error: -0.052942998707294464
        min_q: 2.011606216430664
    num_steps_sampled: 703488
    num_steps_trained: 702

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,687,1231.88,703488,51.023,54.8424,46.9575,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-26-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.84241074942641
  episode_reward_mean: 50.982626517962544
  episode_reward_min: 46.898281378208644
  episodes_this_iter: 16
  episodes_total: 7064
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 706560
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005188874900341034
        max_q: 4.2552971839904785
        mean_q: 4.154421806335449
        mean_td_error: -0.1061166524887085
        min_q: 4.0524492263793945
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008825485594570637
        max_q: 2.2288706302642822
        mean_q: 1.9915472269058228
        mean_td_error: -0.074089415371418
        min_q: 1.8353350162506104
    num_steps_sampled: 706560
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,690,1237.64,706560,50.9826,54.8424,46.8983,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-26-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.46380657807579
  episode_reward_mean: 50.48522318817473
  episode_reward_min: 46.898281378208644
  episodes_this_iter: 16
  episodes_total: 7096
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 709632
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008523991564288735
        max_q: 4.238941669464111
        mean_q: 4.155703067779541
        mean_td_error: -0.00811709463596344
        min_q: 4.012343883514404
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018527243519201875
        max_q: 2.442610502243042
        mean_q: 2.3660781383514404
        mean_td_error: -0.011195816099643707
        min_q: 2.2170979976654053
    num_steps_sampled: 709632
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,693,1243.57,709632,50.4852,53.4638,46.8983,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-26-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.79112960272655
  episode_reward_mean: 50.72845239187775
  episode_reward_min: 46.70988314416053
  episodes_this_iter: 8
  episodes_total: 7120
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 712704
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019302434520795941
        max_q: 4.480703353881836
        mean_q: 4.349796295166016
        mean_td_error: 0.03540785610675812
        min_q: 4.2366719245910645
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022409860976040363
        max_q: 2.682734251022339
        mean_q: 2.571399450302124
        mean_td_error: -0.008357003331184387
        min_q: 2.4909048080444336
    num_steps_sampled: 712704
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,696,1249.59,712704,50.7285,54.7911,46.7099,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-26-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.81185821178898
  episode_reward_mean: 51.1256466945468
  episode_reward_min: 46.70988314416053
  episodes_this_iter: 8
  episodes_total: 7152
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 715776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005245574284344912
        max_q: 4.343192100524902
        mean_q: 4.209295749664307
        mean_td_error: -0.10683678090572357
        min_q: 4.076878547668457
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004629704635590315
        max_q: 2.9618706703186035
        mean_q: 2.666874408721924
        mean_td_error: -0.037656985223293304
        min_q: 2.5051798820495605
    num_steps_sampled: 715776
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,699,1255.57,715776,51.1256,54.8119,46.7099,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-27-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.95657768201198
  episode_reward_mean: 52.35059934598619
  episode_reward_min: 46.70988314416053
  episodes_this_iter: 8
  episodes_total: 7184
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 718848
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020162055734544992
        max_q: 4.3468756675720215
        mean_q: 4.276277542114258
        mean_td_error: -0.04418574273586273
        min_q: 4.214016914367676
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.026306161656975746
        max_q: 2.1710596084594727
        mean_q: 1.9821621179580688
        mean_td_error: -0.33306166529655457
        min_q: 1.7484419345855713
    num_steps_sampled: 718848
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,702,1261.46,718848,52.3506,56.9566,46.7099,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-27-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.95657768201198
  episode_reward_mean: 52.1563980072102
  episode_reward_min: 46.413103857904865
  episodes_this_iter: 8
  episodes_total: 7216
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 721920
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024276033509522676
        max_q: 4.339478969573975
        mean_q: 4.273690700531006
        mean_td_error: -0.046465978026390076
        min_q: 4.223565101623535
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06170864403247833
        max_q: 1.6768280267715454
        mean_q: 1.308810830116272
        mean_td_error: -0.8514840602874756
        min_q: 0.6263439655303955
    num_steps_sampled: 721920
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,705,1267.25,721920,52.1564,56.9566,46.4131,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-27-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.95657768201198
  episode_reward_mean: 52.29869817368117
  episode_reward_min: 46.413103857904865
  episodes_this_iter: 16
  episodes_total: 7248
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 724992
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021613140124827623
        max_q: 4.389310836791992
        mean_q: 4.327761173248291
        mean_td_error: -0.04296061396598816
        min_q: 4.24417781829834
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.034739937633275986
        max_q: 1.2644283771514893
        mean_q: 1.0539953708648682
        mean_td_error: -0.43821075558662415
        min_q: 0.9366550445556641
    num_steps_sampled: 724992
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,708,1273.12,724992,52.2987,56.9566,46.4131,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-27-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.620092030142715
  episode_reward_mean: 51.87542585360117
  episode_reward_min: 46.413103857904865
  episodes_this_iter: 16
  episodes_total: 7280
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 728064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010524103417992592
        max_q: 4.559426784515381
        mean_q: 4.491905212402344
        mean_td_error: -0.017390206456184387
        min_q: 4.407819747924805
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016525328159332275
        max_q: 1.8079984188079834
        mean_q: 1.5341131687164307
        mean_td_error: -0.16901253163814545
        min_q: 1.360107183456421
    num_steps_sampled: 728064
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,711,1278.87,728064,51.8754,56.6201,46.4131,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-27-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.620092030142715
  episode_reward_mean: 52.88495791543307
  episode_reward_min: 49.43997070064126
  episodes_this_iter: 8
  episodes_total: 7304
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 731136
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01273199450224638
        max_q: 4.516439914703369
        mean_q: 4.336227893829346
        mean_td_error: -0.2846837341785431
        min_q: 4.183647632598877
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035445792600512505
        max_q: 1.8348569869995117
        mean_q: 1.7208826541900635
        mean_td_error: 0.040377482771873474
        min_q: 1.6008281707763672
    num_steps_sampled: 731136
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,714,1284.66,731136,52.885,56.6201,49.44,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-27-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.727004205120664
  episode_reward_mean: 53.473350867793435
  episode_reward_min: 48.99517320095321
  episodes_this_iter: 8
  episodes_total: 7336
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 734208
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011857292614877224
        max_q: 4.135269641876221
        mean_q: 3.515909194946289
        mean_td_error: -0.23742356896400452
        min_q: 2.910351276397705
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.020158013328909874
        max_q: 2.4406566619873047
        mean_q: 2.271867036819458
        mean_td_error: -0.20522892475128174
        min_q: 2.0599112510681152
    num_steps_sampled: 734208
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,717,1290.42,734208,53.4734,57.727,48.9952,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-27-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.727004205120664
  episode_reward_mean: 53.82239879841827
  episode_reward_min: 48.99517320095321
  episodes_this_iter: 8
  episodes_total: 7368
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 737280
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004514566157013178
        max_q: 2.9496841430664062
        mean_q: 2.660048246383667
        mean_td_error: -0.05538751184940338
        min_q: 2.508816957473755
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018449213355779648
        max_q: 2.8189697265625
        mean_q: 2.69736385345459
        mean_td_error: -0.19682613015174866
        min_q: 2.5697662830352783
    num_steps_sampled: 737280
    num_steps_trained: 73

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,720,1296.03,737280,53.8224,57.727,48.9952,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-27-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.727004205120664
  episode_reward_mean: 53.10825703896484
  episode_reward_min: 47.746499976634475
  episodes_this_iter: 8
  episodes_total: 7400
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 740352
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002409240696579218
        max_q: 3.139462471008301
        mean_q: 2.992983818054199
        mean_td_error: 0.01688089221715927
        min_q: 2.7872495651245117
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004611399956047535
        max_q: 2.763089179992676
        mean_q: 2.6276378631591797
        mean_td_error: -0.06936835497617722
        min_q: 2.5092856884002686
    num_steps_sampled: 740352
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,723,1301.65,740352,53.1083,57.727,47.7465,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-27-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.41446920395583
  episode_reward_mean: 52.12116860591141
  episode_reward_min: 47.746499976634475
  episodes_this_iter: 8
  episodes_total: 7432
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 743424
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00815376453101635
        max_q: 2.802262783050537
        mean_q: 2.544069528579712
        mean_td_error: -0.1243446096777916
        min_q: 2.254610776901245
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037782753352075815
        max_q: 3.448073387145996
        mean_q: 3.2888121604919434
        mean_td_error: 0.04097384214401245
        min_q: 3.1746623516082764
    num_steps_sampled: 743424
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,726,1307.2,743424,52.1212,56.4145,47.7465,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-27-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.13851784188372
  episode_reward_mean: 51.95542091542487
  episode_reward_min: 47.09555988051307
  episodes_this_iter: 16
  episodes_total: 7464
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 746496
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015412712469696999
        max_q: 2.899083137512207
        mean_q: 2.700326442718506
        mean_td_error: -0.25621405243873596
        min_q: 2.48880934715271
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025899296160787344
        max_q: 3.7096595764160156
        mean_q: 3.5841269493103027
        mean_td_error: 0.03650394082069397
        min_q: 3.420135021209717
    num_steps_sampled: 746496
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,729,1312.98,746496,51.9554,56.1385,47.0956,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-28-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.969928998056986
  episode_reward_mean: 52.150798974382006
  episode_reward_min: 47.09555988051307
  episodes_this_iter: 8
  episodes_total: 7488
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 749568
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023227903991937637
        max_q: 2.0657830238342285
        mean_q: 1.742674708366394
        mean_td_error: -0.33424559235572815
        min_q: 1.4221971035003662
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002416352042928338
        max_q: 4.029890060424805
        mean_q: 3.950399160385132
        mean_td_error: -0.02685452252626419
        min_q: 3.8819003105163574
    num_steps_sampled: 749568
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,732,1318.63,749568,52.1508,55.9699,47.0956,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-28-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.25963651821889
  episode_reward_mean: 52.169562951215475
  episode_reward_min: 43.69218509834728
  episodes_this_iter: 8
  episodes_total: 7520
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 752640
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007359110284596682
        max_q: 2.47501802444458
        mean_q: 2.251115322113037
        mean_td_error: 0.1088332086801529
        min_q: 2.1511356830596924
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002181378426030278
        max_q: 4.323482036590576
        mean_q: 4.2638139724731445
        mean_td_error: 0.011416047811508179
        min_q: 4.172616958618164
    num_steps_sampled: 752640
    num_steps_trained: 75

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,735,1324.59,752640,52.1696,59.2596,43.6922,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-28-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.25963651821889
  episode_reward_mean: 52.37162017973609
  episode_reward_min: 43.69218509834728
  episodes_this_iter: 8
  episodes_total: 7552
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 755712
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024375978391617537
        max_q: 2.5074901580810547
        mean_q: 2.408559799194336
        mean_td_error: 0.027493566274642944
        min_q: 2.2728564739227295
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027786160353571177
        max_q: 4.275835990905762
        mean_q: 4.230622291564941
        mean_td_error: -0.055436670780181885
        min_q: 4.139178276062012
    num_steps_sampled: 755712
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,738,1330.6,755712,52.3716,59.2596,43.6922,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-28-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.25963651821889
  episode_reward_mean: 52.24788632590548
  episode_reward_min: 43.69218509834728
  episodes_this_iter: 8
  episodes_total: 7584
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 758784
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007176291663199663
        max_q: 2.9470973014831543
        mean_q: 2.8361966609954834
        mean_td_error: 0.1477934718132019
        min_q: 2.6199393272399902
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015557529404759407
        max_q: 4.58493709564209
        mean_q: 4.494647026062012
        mean_td_error: 0.024234503507614136
        min_q: 4.377263069152832
    num_steps_sampled: 758784
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,741,1336.92,758784,52.2479,59.2596,43.6922,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-28-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.93592127898303
  episode_reward_mean: 52.95968393212168
  episode_reward_min: 43.69218509834728
  episodes_this_iter: 8
  episodes_total: 7616
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 761856
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00198930106125772
        max_q: 3.815199136734009
        mean_q: 3.7439427375793457
        mean_td_error: 0.04124419391155243
        min_q: 3.5863075256347656
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021415010560303926
        max_q: 4.7181291580200195
        mean_q: 4.639797210693359
        mean_td_error: -0.03800287842750549
        min_q: 4.568264961242676
    num_steps_sampled: 761856
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,744,1343.62,761856,52.9597,56.9359,43.6922,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-28-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.84551161942228
  episode_reward_mean: 53.79730874041189
  episode_reward_min: 47.82151641240466
  episodes_this_iter: 16
  episodes_total: 7648
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 764928
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009411695064045489
        max_q: 4.285041809082031
        mean_q: 4.222074508666992
        mean_td_error: 0.01811368763446808
        min_q: 4.092897415161133
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030177682638168335
        max_q: 4.951267719268799
        mean_q: 4.911194324493408
        mean_td_error: -0.04215328395366669
        min_q: 4.829455852508545
    num_steps_sampled: 764928
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,747,1350.72,764928,53.7973,57.8455,47.8215,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-28-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.38703720457375
  episode_reward_mean: 54.45008819089335
  episode_reward_min: 47.82151641240466
  episodes_this_iter: 8
  episodes_total: 7664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 766976
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011436694767326117
        max_q: 4.56458044052124
        mean_q: 4.527713298797607
        mean_td_error: 0.009963378310203552
        min_q: 4.471851825714111
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003682379610836506
        max_q: 4.989006519317627
        mean_q: 4.907304286956787
        mean_td_error: 0.06595826148986816
        min_q: 4.756191730499268
    num_steps_sampled: 766976
    num_steps_trained: 765

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,749,1355.67,766976,54.4501,58.387,47.8215,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-28-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.38703720457375
  episode_reward_mean: 54.657584986420815
  episode_reward_min: 47.82909285219382
  episodes_this_iter: 8
  episodes_total: 7688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 769024
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00558162247762084
        max_q: 4.809576034545898
        mean_q: 4.738234519958496
        mean_td_error: 0.11258326470851898
        min_q: 4.634458541870117
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006063766311854124
        max_q: 4.9264326095581055
        mean_q: 4.878753662109375
        mean_td_error: 0.12288635969161987
        min_q: 4.796954154968262
    num_steps_sampled: 769024
    num_steps_trained: 768

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,751,1360.85,769024,54.6576,58.387,47.8291,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-28-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.44937703351378
  episode_reward_mean: 54.925021265236516
  episode_reward_min: 47.82909285219382
  episodes_this_iter: 8
  episodes_total: 7704
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 771072
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0007882713689468801
        max_q: 4.876172065734863
        mean_q: 4.841397285461426
        mean_td_error: -0.0033425092697143555
        min_q: 4.773512840270996
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0007168003940023482
        max_q: 4.900691509246826
        mean_q: 4.865748405456543
        mean_td_error: -0.010431334376335144
        min_q: 4.7803449630737305
    num_steps_sampled: 771072
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,753,1366.31,771072,54.925,58.4494,47.8291,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-28-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.44937703351378
  episode_reward_mean: 55.37043349930029
  episode_reward_min: 47.82909285219382
  episodes_this_iter: 8
  episodes_total: 7728
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 773120
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002461147028952837
        max_q: 4.942114353179932
        mean_q: 4.855504989624023
        mean_td_error: -0.04671645164489746
        min_q: 4.750166416168213
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0036435837391763926
        max_q: 4.9473466873168945
        mean_q: 4.909899711608887
        mean_td_error: -0.06520125269889832
        min_q: 4.8410844802856445
    num_steps_sampled: 773120
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,755,1372.12,773120,55.3704,58.4494,47.8291,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-29-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.44937703351378
  episode_reward_mean: 55.123032451597986
  episode_reward_min: 50.70372949685385
  episodes_this_iter: 8
  episodes_total: 7744
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 775168
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014374583261087537
        max_q: 4.940070629119873
        mean_q: 4.896605491638184
        mean_td_error: 0.02384863793849945
        min_q: 4.825811386108398
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003001890378072858
        max_q: 4.900407314300537
        mean_q: 4.857013702392578
        mean_td_error: -0.047160327434539795
        min_q: 4.806122303009033
    num_steps_sampled: 775168
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,757,1377.98,775168,55.123,58.4494,50.7037,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-29-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.44937703351378
  episode_reward_mean: 53.77874467050571
  episode_reward_min: 49.88424405060282
  episodes_this_iter: 8
  episodes_total: 7768
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 777216
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019070004345849156
        max_q: 4.943109035491943
        mean_q: 4.863387584686279
        mean_td_error: -0.03889477252960205
        min_q: 4.721257209777832
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032264010515064
        max_q: 4.812865734100342
        mean_q: 4.732625961303711
        mean_td_error: -0.06424367427825928
        min_q: 4.673354625701904
    num_steps_sampled: 777216
    num_steps_trained: 776

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,759,1383.64,777216,53.7787,58.4494,49.8842,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-29-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.28945984788926
  episode_reward_mean: 52.76612130799873
  episode_reward_min: 49.60977684728931
  episodes_this_iter: 16
  episodes_total: 7792
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 779264
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001615279819816351
        max_q: 4.939098358154297
        mean_q: 4.8324785232543945
        mean_td_error: -0.016933858394622803
        min_q: 4.753355026245117
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003485130611807108
        max_q: 4.886908531188965
        mean_q: 4.835829734802246
        mean_td_error: -0.0552918016910553
        min_q: 4.76356840133667
    num_steps_sampled: 779264
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,761,1388.98,779264,52.7661,58.2895,49.6098,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-29-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.69223346443859
  episode_reward_mean: 52.5075966555609
  episode_reward_min: 49.60977684728931
  episodes_this_iter: 8
  episodes_total: 7808
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 781312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037083032075315714
        max_q: 4.95542573928833
        mean_q: 4.896434783935547
        mean_td_error: -0.07243920862674713
        min_q: 4.801993370056152
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025353035889565945
        max_q: 4.862504482269287
        mean_q: 4.791360855102539
        mean_td_error: 0.05037824809551239
        min_q: 4.717625141143799
    num_steps_sampled: 781312
    num_steps_trained: 780

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,763,1394.18,781312,52.5076,57.6922,49.6098,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-29-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.020941612561636
  episode_reward_mean: 51.382392791720356
  episode_reward_min: 45.99663193252579
  episodes_this_iter: 16
  episodes_total: 7832
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 783360
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002618699101731181
        max_q: 4.750770092010498
        mean_q: 4.691042900085449
        mean_td_error: -0.05454109609127045
        min_q: 4.560582160949707
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002470494946464896
        max_q: 4.827272415161133
        mean_q: 4.75152063369751
        mean_td_error: -0.04819297790527344
        min_q: 4.573268890380859
    num_steps_sampled: 783360
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,765,1399.42,783360,51.3824,56.0209,45.9966,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-29-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.020941612561636
  episode_reward_mean: 51.30703060007925
  episode_reward_min: 45.99663193252579
  episodes_this_iter: 8
  episodes_total: 7848
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 785408
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006414442788809538
        max_q: 4.724358558654785
        mean_q: 4.617916107177734
        mean_td_error: -0.1206154078245163
        min_q: 4.462040424346924
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012538724578917027
        max_q: 4.5797600746154785
        mean_q: 4.411289215087891
        mean_td_error: -0.21995088458061218
        min_q: 4.1891021728515625
    num_steps_sampled: 785408
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,767,1404.4,785408,51.307,56.0209,45.9966,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-29-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.020941612561636
  episode_reward_mean: 51.12365315218205
  episode_reward_min: 45.99663193252579
  episodes_this_iter: 8
  episodes_total: 7880
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 788480
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004303349647670984
        max_q: 4.953372478485107
        mean_q: 4.359260082244873
        mean_td_error: -0.051627904176712036
        min_q: 4.17344331741333
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003946597222238779
        max_q: 4.193727970123291
        mean_q: 4.147114276885986
        mean_td_error: -0.05585944652557373
        min_q: 4.108520030975342
    num_steps_sampled: 788480
    num_steps_trained: 7

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,770,1411.43,788480,51.1237,56.0209,45.9966,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-29-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.020941612561636
  episode_reward_mean: 51.553374339093665
  episode_reward_min: 45.99663193252579
  episodes_this_iter: 8
  episodes_total: 7912
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 791552
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005038985051214695
        max_q: 4.142117977142334
        mean_q: 4.016940593719482
        mean_td_error: -0.0670049861073494
        min_q: 3.827923536300659
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003931955900043249
        max_q: 4.158563613891602
        mean_q: 4.087101936340332
        mean_td_error: -0.056421324610710144
        min_q: 3.9539198875427246
    num_steps_sampled: 791552
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,773,1418.45,791552,51.5534,56.0209,45.9966,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-29-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.67072714195838
  episode_reward_mean: 52.3555221463135
  episode_reward_min: 48.24639838867966
  episodes_this_iter: 8
  episodes_total: 7944
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 794624
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008023777976632118
        max_q: 4.2867560386657715
        mean_q: 4.146078109741211
        mean_td_error: -0.12599259614944458
        min_q: 3.9878311157226562
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027159620076417923
        max_q: 3.54726243019104
        mean_q: 3.395650863647461
        mean_td_error: -0.0020385757088661194
        min_q: 3.1807103157043457
    num_steps_sampled: 794624
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,776,1425.15,794624,52.3555,55.6707,48.2464,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-30-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.75428698877998
  episode_reward_mean: 53.27361843789014
  episode_reward_min: 48.997310315007006
  episodes_this_iter: 16
  episodes_total: 7976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 797696
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04998016357421875
        max_q: 3.4624080657958984
        mean_q: 2.967503547668457
        mean_td_error: -0.5068213939666748
        min_q: 2.607095718383789
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01347600482404232
        max_q: 3.2601816654205322
        mean_q: 2.822779417037964
        mean_td_error: -0.20796898007392883
        min_q: 2.4340872764587402
    num_steps_sampled: 797696
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,779,1431.48,797696,53.2736,55.7543,48.9973,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-30-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.75428698877998
  episode_reward_mean: 53.209551068025995
  episode_reward_min: 48.997310315007006
  episodes_this_iter: 8
  episodes_total: 8000
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 800768
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03374616429209709
        max_q: 2.3839175701141357
        mean_q: 2.0439889430999756
        mean_td_error: -0.3958406150341034
        min_q: 1.8147977590560913
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007605567574501038
        max_q: 2.908872365951538
        mean_q: 2.398122787475586
        mean_td_error: 0.033304259181022644
        min_q: 2.0334339141845703
    num_steps_sampled: 800768
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,782,1437.73,800768,53.2096,55.7543,48.9973,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-30-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.2761595850422
  episode_reward_mean: 53.451960784113844
  episode_reward_min: 50.20300205921189
  episodes_this_iter: 8
  episodes_total: 8032
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 803840
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018009567633271217
        max_q: 2.593332290649414
        mean_q: 2.319786787033081
        mean_td_error: -0.2324172556400299
        min_q: 2.0677852630615234
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009206424467265606
        max_q: 2.6067605018615723
        mean_q: 2.3472201824188232
        mean_td_error: -0.1055079847574234
        min_q: 2.0519163608551025
    num_steps_sampled: 803840
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,785,1444,803840,53.452,57.2762,50.203,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-30-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.2761595850422
  episode_reward_mean: 53.19908281548993
  episode_reward_min: 50.20300205921189
  episodes_this_iter: 8
  episodes_total: 8064
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 806912
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.022449932992458344
        max_q: 2.4528627395629883
        mean_q: 2.244783401489258
        mean_td_error: -0.2886812686920166
        min_q: 2.0305111408233643
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04394456371665001
        max_q: 2.2635233402252197
        mean_q: 1.9899319410324097
        mean_td_error: -0.5256485939025879
        min_q: 1.770398736000061
    num_steps_sampled: 806912
    num_steps_trained: 80

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,788,1450.51,806912,53.1991,57.2762,50.203,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-30-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.2761595850422
  episode_reward_mean: 52.741589050891925
  episode_reward_min: 49.04362324951468
  episodes_this_iter: 8
  episodes_total: 8096
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 809984
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00804353877902031
        max_q: 2.808664560317993
        mean_q: 2.6853408813476562
        mean_td_error: -0.11039236932992935
        min_q: 2.523357391357422
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005590340122580528
        max_q: 2.510775566101074
        mean_q: 2.202805995941162
        mean_td_error: -0.061318106949329376
        min_q: 2.061044692993164
    num_steps_sampled: 809984
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,791,1457.01,809984,52.7416,57.2762,49.0436,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-30-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.83101024792317
  episode_reward_mean: 53.24472974661586
  episode_reward_min: 49.04362324951468
  episodes_this_iter: 8
  episodes_total: 8128
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 813056
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010503632016479969
        max_q: 3.0884437561035156
        mean_q: 2.8674983978271484
        mean_td_error: -0.12421814352273941
        min_q: 2.708606719970703
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009858096949756145
        max_q: 2.091287136077881
        mean_q: 1.8926451206207275
        mean_td_error: 0.12739945948123932
        min_q: 1.7296123504638672
    num_steps_sampled: 813056
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,794,1463.53,813056,53.2447,59.831,49.0436,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-30-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.521032799246754
  episode_reward_mean: 53.76831374096683
  episode_reward_min: 49.04362324951468
  episodes_this_iter: 16
  episodes_total: 8160
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 816128
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00651548383757472
        max_q: 3.094980239868164
        mean_q: 2.972118377685547
        mean_td_error: -0.07833380997180939
        min_q: 2.854257345199585
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011777334846556187
        max_q: 1.5521690845489502
        mean_q: 1.280116081237793
        mean_td_error: -0.1306440532207489
        min_q: 0.9158825874328613
    num_steps_sampled: 816128
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,797,1470.06,816128,53.7683,60.521,49.0436,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-30-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.521032799246754
  episode_reward_mean: 54.733067946508555
  episode_reward_min: 47.12008399480233
  episodes_this_iter: 16
  episodes_total: 8192
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 819200
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004179223906248808
        max_q: 3.4651622772216797
        mean_q: 3.3473026752471924
        mean_td_error: -0.05279083549976349
        min_q: 3.2841105461120605
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012023542076349258
        max_q: 1.4985581636428833
        mean_q: 1.111209750175476
        mean_td_error: -0.15418647229671478
        min_q: 0.9761793613433838
    num_steps_sampled: 819200
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,800,1476.59,819200,54.7331,60.521,47.1201,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-30-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.521032799246754
  episode_reward_mean: 54.73221530682813
  episode_reward_min: 47.12008399480233
  episodes_this_iter: 8
  episodes_total: 8216
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 822272
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018148940056562424
        max_q: 3.37666916847229
        mean_q: 3.241673231124878
        mean_td_error: -0.22369398176670074
        min_q: 3.0609323978424072
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.024939684197306633
        max_q: 2.0190529823303223
        mean_q: 1.7366523742675781
        mean_td_error: -0.2586246728897095
        min_q: 1.5953848361968994
    num_steps_sampled: 822272
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,803,1483.1,822272,54.7322,60.521,47.1201,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-31-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.26151906554073
  episode_reward_mean: 54.53938121360541
  episode_reward_min: 47.12008399480233
  episodes_this_iter: 8
  episodes_total: 8248
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 825344
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006487768143415451
        max_q: 3.447852849960327
        mean_q: 3.3369131088256836
        mean_td_error: -0.06700639426708221
        min_q: 3.1809821128845215
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017911534756422043
        max_q: 2.1686763763427734
        mean_q: 1.8227912187576294
        mean_td_error: -0.1716317981481552
        min_q: 1.4752527475357056
    num_steps_sampled: 825344
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,806,1489.55,825344,54.5394,60.2615,47.1201,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-31-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.47227358491854
  episode_reward_mean: 53.984152190351324
  episode_reward_min: 50.377001138405745
  episodes_this_iter: 8
  episodes_total: 8280
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 828416
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003389378311112523
        max_q: 3.6718266010284424
        mean_q: 3.514491558074951
        mean_td_error: -0.004163607954978943
        min_q: 3.306675434112549
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014583801850676537
        max_q: 1.4386640787124634
        mean_q: 0.9725009202957153
        mean_td_error: -0.20352160930633545
        min_q: 0.5724977254867554
    num_steps_sampled: 828416
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,809,1495.84,828416,53.9842,58.4723,50.377,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-31-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.47227358491854
  episode_reward_mean: 52.448206875314646
  episode_reward_min: 44.109192493531
  episodes_this_iter: 8
  episodes_total: 8312
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 831488
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0031981063075363636
        max_q: 3.8203160762786865
        mean_q: 3.7186427116394043
        mean_td_error: -0.02873307466506958
        min_q: 3.647434711456299
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00206165318377316
        max_q: 2.4754085540771484
        mean_q: 2.4436192512512207
        mean_td_error: 0.02256348729133606
        min_q: 2.312930107116699
    num_steps_sampled: 831488
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,812,1502.33,831488,52.4482,58.4723,44.1092,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-31-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.44505211471945
  episode_reward_mean: 51.89645757996327
  episode_reward_min: 44.109192493531
  episodes_this_iter: 16
  episodes_total: 8344
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 834560
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005791338626295328
        max_q: 4.096363544464111
        mean_q: 4.011818885803223
        mean_td_error: -0.06506854295730591
        min_q: 3.954998254776001
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014473197050392628
        max_q: 3.612924575805664
        mean_q: 3.3478479385375977
        mean_td_error: 0.21962834894657135
        min_q: 3.247708320617676
    num_steps_sampled: 834560
    num_steps_trained: 833

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,815,1509.08,834560,51.8965,57.4451,44.1092,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-31-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.79943985966666
  episode_reward_mean: 51.84463174092349
  episode_reward_min: 44.109192493531
  episodes_this_iter: 16
  episodes_total: 8376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 837632
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012220348231494427
        max_q: 4.18496561050415
        mean_q: 3.8949337005615234
        mean_td_error: -0.10153868049383163
        min_q: 3.5572452545166016
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020153650548309088
        max_q: 4.014130115509033
        mean_q: 3.932969093322754
        mean_td_error: 0.02099386602640152
        min_q: 3.8180642127990723
    num_steps_sampled: 837632
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,818,1515.48,837632,51.8446,58.7994,44.1092,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-31-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.79943985966666
  episode_reward_mean: 53.55127340753858
  episode_reward_min: 44.40174065474716
  episodes_this_iter: 8
  episodes_total: 8400
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 840704
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013250338844954967
        max_q: 4.0419206619262695
        mean_q: 3.9161176681518555
        mean_td_error: -0.15680325031280518
        min_q: 3.797787666320801
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021916746627539396
        max_q: 4.466806411743164
        mean_q: 4.421262741088867
        mean_td_error: 0.0346769243478775
        min_q: 4.279819965362549
    num_steps_sampled: 840704
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,821,1521.74,840704,53.5513,58.7994,44.4017,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-31-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.79943985966666
  episode_reward_mean: 54.05303752070008
  episode_reward_min: 48.74716557339104
  episodes_this_iter: 8
  episodes_total: 8432
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 843776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015769419260323048
        max_q: 3.9981679916381836
        mean_q: 3.8639893531799316
        mean_td_error: 0.029085680842399597
        min_q: 3.71757173538208
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008692547562532127
        max_q: 4.793428421020508
        mean_q: 4.696490287780762
        mean_td_error: -0.0031174123287200928
        min_q: 4.593494892120361
    num_steps_sampled: 843776
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,824,1528.02,843776,54.053,58.7994,48.7472,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-31-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.645474382756056
  episode_reward_mean: 54.280076801363876
  episode_reward_min: 48.74716557339104
  episodes_this_iter: 8
  episodes_total: 8464
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 846848
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033403062261641026
        max_q: 3.718564748764038
        mean_q: 3.46770977973938
        mean_td_error: -0.03829379379749298
        min_q: 3.3121047019958496
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001499403384514153
        max_q: 5.189608573913574
        mean_q: 5.110676288604736
        mean_td_error: 0.02075117826461792
        min_q: 4.971683979034424
    num_steps_sampled: 846848
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,827,1534.3,846848,54.2801,58.6455,48.7472,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-31-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.814457826027045
  episode_reward_mean: 53.963565985063504
  episode_reward_min: 50.6472157407473
  episodes_this_iter: 8
  episodes_total: 8496
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 849920
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019072070717811584
        max_q: 3.392775535583496
        mean_q: 3.2840988636016846
        mean_td_error: 0.030701301991939545
        min_q: 3.1772780418395996
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001025197678245604
        max_q: 5.323740482330322
        mean_q: 5.260986328125
        mean_td_error: 0.010338753461837769
        min_q: 5.196595191955566
    num_steps_sampled: 849920
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,830,1540.64,849920,53.9636,57.8145,50.6472,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-32-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.8466970687077
  episode_reward_mean: 52.72197527403704
  episode_reward_min: 48.85620106834271
  episodes_this_iter: 16
  episodes_total: 8528
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 852992
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014591600047424436
        max_q: 3.390608310699463
        mean_q: 3.317683219909668
        mean_td_error: 0.017293937504291534
        min_q: 3.233713150024414
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008475866634398699
        max_q: 5.376685619354248
        mean_q: 5.347341060638428
        mean_td_error: 0.011952698230743408
        min_q: 5.305899620056152
    num_steps_sampled: 852992
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,833,1547.01,852992,52.722,57.8467,48.8562,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-32-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.8466970687077
  episode_reward_mean: 52.36511075119802
  episode_reward_min: 48.85620106834271
  episodes_this_iter: 16
  episodes_total: 8560
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 856064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004833719227463007
        max_q: 3.6402578353881836
        mean_q: 3.516382932662964
        mean_td_error: -0.0859101414680481
        min_q: 3.4157536029815674
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028843397740274668
        max_q: 5.358456134796143
        mean_q: 5.308967113494873
        mean_td_error: -0.06039401888847351
        min_q: 5.264364719390869
    num_steps_sampled: 856064
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,836,1553.75,856064,52.3651,57.8467,48.8562,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-32-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.8466970687077
  episode_reward_mean: 51.90734908991661
  episode_reward_min: 48.85620106834271
  episodes_this_iter: 8
  episodes_total: 8584
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 859136
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027247227262705564
        max_q: 3.7854464054107666
        mean_q: 3.7132959365844727
        mean_td_error: -0.04322712868452072
        min_q: 3.6033849716186523
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001675850129686296
        max_q: 5.262121200561523
        mean_q: 5.210955619812012
        mean_td_error: -0.02307276427745819
        min_q: 5.103360176086426
    num_steps_sampled: 859136
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,839,1560.35,859136,51.9073,57.8467,48.8562,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-32-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.780482583322645
  episode_reward_mean: 51.18653882930958
  episode_reward_min: 48.47300623280359
  episodes_this_iter: 8
  episodes_total: 8616
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 862208
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003183456137776375
        max_q: 3.923778772354126
        mean_q: 3.8311190605163574
        mean_td_error: 0.05919025093317032
        min_q: 3.778886079788208
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014757266035303473
        max_q: 5.040249824523926
        mean_q: 4.974147319793701
        mean_td_error: -0.01216977834701538
        min_q: 4.82675313949585
    num_steps_sampled: 862208
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,842,1567,862208,51.1865,54.7805,48.473,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-32-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.748171166776054
  episode_reward_mean: 52.344625009123185
  episode_reward_min: 48.47300623280359
  episodes_this_iter: 8
  episodes_total: 8648
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 865280
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024170204997062683
        max_q: 3.9663848876953125
        mean_q: 3.8188579082489014
        mean_td_error: 0.02296575903892517
        min_q: 3.7004284858703613
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023750195279717445
        max_q: 4.777588367462158
        mean_q: 4.431019306182861
        mean_td_error: -0.28046756982803345
        min_q: 4.116724014282227
    num_steps_sampled: 865280
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,845,1573.69,865280,52.3446,57.7482,48.473,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-32-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.748171166776054
  episode_reward_mean: 52.791338521409955
  episode_reward_min: 48.47300623280359
  episodes_this_iter: 8
  episodes_total: 8680
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 868352
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007780700922012329
        max_q: 4.0190839767456055
        mean_q: 3.9496772289276123
        mean_td_error: -0.13874012231826782
        min_q: 3.8474366664886475
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007860017940402031
        max_q: 4.54392671585083
        mean_q: 4.403157711029053
        mean_td_error: 0.10306479036808014
        min_q: 4.299286842346191
    num_steps_sampled: 868352
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,848,1580.62,868352,52.7913,57.7482,48.473,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-32-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.748171166776054
  episode_reward_mean: 52.34403651112269
  episode_reward_min: 46.095213402295705
  episodes_this_iter: 8
  episodes_total: 8712
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 871424
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018936380743980408
        max_q: 4.097737789154053
        mean_q: 3.769383430480957
        mean_td_error: -0.19084009528160095
        min_q: 3.5415964126586914
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029581349808722734
        max_q: 4.476406574249268
        mean_q: 4.295208930969238
        mean_td_error: -0.00777946412563324
        min_q: 4.160327911376953
    num_steps_sampled: 871424
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,851,1587.77,871424,52.344,57.7482,46.0952,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-32-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.748171166776054
  episode_reward_mean: 51.75013308835956
  episode_reward_min: 46.095213402295705
  episodes_this_iter: 8
  episodes_total: 8728
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 873472
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06588443368673325
        max_q: 3.088141918182373
        mean_q: 2.2286550998687744
        mean_td_error: -0.7734811305999756
        min_q: 1.5412335395812988
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006876075640320778
        max_q: 4.582267761230469
        mean_q: 4.473636627197266
        mean_td_error: -0.0736866444349289
        min_q: 4.424211025238037
    num_steps_sampled: 873472
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,853,1592.64,873472,51.7501,57.7482,46.0952,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-32-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.87864193419359
  episode_reward_mean: 51.143255160890895
  episode_reward_min: 46.095213402295705
  episodes_this_iter: 8
  episodes_total: 8752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 875520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03633035346865654
        max_q: 2.8847036361694336
        mean_q: 2.360520362854004
        mean_td_error: -0.36664044857025146
        min_q: 2.058354139328003
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0036954181268811226
        max_q: 4.583341121673584
        mean_q: 4.500144004821777
        mean_td_error: -0.0786392092704773
        min_q: 4.421344757080078
    num_steps_sampled: 875520
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,855,1597.85,875520,51.1433,54.8786,46.0952,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-32-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.87864193419359
  episode_reward_mean: 50.54031981042799
  episode_reward_min: 46.095213402295705
  episodes_this_iter: 8
  episodes_total: 8768
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 877568
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04812902212142944
        max_q: 2.5664496421813965
        mean_q: 2.302398681640625
        mean_td_error: -0.48585712909698486
        min_q: 2.124277114868164
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010993951000273228
        max_q: 4.77699089050293
        mean_q: 4.723598957061768
        mean_td_error: 0.026627153158187866
        min_q: 4.621043682098389
    num_steps_sampled: 877568
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,857,1602.95,877568,50.5403,54.8786,46.0952,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-33-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.87864193419359
  episode_reward_mean: 50.152256507252396
  episode_reward_min: 46.095213402295705
  episodes_this_iter: 8
  episodes_total: 8792
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 879616
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009916355833411217
        max_q: 2.4488234519958496
        mean_q: 2.1036996841430664
        mean_td_error: 0.10464856028556824
        min_q: 2.030216693878174
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0007081756484694779
        max_q: 4.950987815856934
        mean_q: 4.881453037261963
        mean_td_error: -0.00554463267326355
        min_q: 4.824848175048828
    num_steps_sampled: 879616
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,859,1608.06,879616,50.1523,54.8786,46.0952,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-33-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.87864193419359
  episode_reward_mean: 49.948943652547044
  episode_reward_min: 46.26552159821538
  episodes_this_iter: 16
  episodes_total: 8816
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 881664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011163970455527306
        max_q: 2.3856804370880127
        mean_q: 2.15104079246521
        mean_td_error: -0.10173572599887848
        min_q: 1.9197783470153809
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017336229793727398
        max_q: 5.0004143714904785
        mean_q: 4.880906105041504
        mean_td_error: 0.03617487847805023
        min_q: 4.8083014488220215
    num_steps_sampled: 881664
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,861,1613.85,881664,49.9489,54.8786,46.2655,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-33-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.80366158155321
  episode_reward_mean: 50.136930578204286
  episode_reward_min: 46.23167680358674
  episodes_this_iter: 8
  episodes_total: 8832
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 883712
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.028258292004466057
        max_q: 2.142245292663574
        mean_q: 1.4870610237121582
        mean_td_error: -0.3046320080757141
        min_q: 1.2028785943984985
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0006109739770181477
        max_q: 4.873253345489502
        mean_q: 4.789101600646973
        mean_td_error: 0.0013739168643951416
        min_q: 4.731595039367676
    num_steps_sampled: 883712
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,863,1619.53,883712,50.1369,55.8037,46.2317,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-33-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.80366158155321
  episode_reward_mean: 49.556895499522724
  episode_reward_min: 44.778609918444
  episodes_this_iter: 16
  episodes_total: 8856
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 885760
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010648684576153755
        max_q: 1.2180368900299072
        mean_q: 0.7687819004058838
        mean_td_error: -0.012641225010156631
        min_q: 0.43646425008773804
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018030579667538404
        max_q: 4.868220329284668
        mean_q: 4.804859161376953
        mean_td_error: -0.04468461871147156
        min_q: 4.72555685043335
    num_steps_sampled: 885760
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,865,1625.51,885760,49.5569,55.8037,44.7786,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-33-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.41596283162358
  episode_reward_mean: 50.03017752239521
  episode_reward_min: 44.778609918444
  episodes_this_iter: 8
  episodes_total: 8872
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 887808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0391387902200222
        max_q: 0.9910259246826172
        mean_q: 0.5648155212402344
        mean_td_error: -0.33898234367370605
        min_q: 0.41524219512939453
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00308236270211637
        max_q: 4.874735355377197
        mean_q: 4.856900691986084
        mean_td_error: 0.07607480883598328
        min_q: 4.782327651977539
    num_steps_sampled: 887808
    num_steps_trained: 8868

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,867,1631.83,887808,50.0302,58.416,44.7786,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-33-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.41596283162358
  episode_reward_mean: 50.494867513759175
  episode_reward_min: 44.778609918444
  episodes_this_iter: 8
  episodes_total: 8896
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 889856
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01248509343713522
        max_q: 0.7219054698944092
        mean_q: 0.35510534048080444
        mean_td_error: -0.07793529331684113
        min_q: 0.12941572070121765
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010008220560848713
        max_q: 4.903072834014893
        mean_q: 4.858701705932617
        mean_td_error: -0.022025495767593384
        min_q: 4.758033752441406
    num_steps_sampled: 889856
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,869,1638.58,889856,50.4949,58.416,44.7786,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-33-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.41596283162358
  episode_reward_mean: 50.71596099458472
  episode_reward_min: 44.778609918444
  episodes_this_iter: 8
  episodes_total: 8912
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 891904
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02730104885995388
        max_q: 0.9050719738006592
        mean_q: 0.7439312934875488
        mean_td_error: -0.25017163157463074
        min_q: 0.60866379737854
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009089024970307946
        max_q: 4.853431701660156
        mean_q: 4.817532539367676
        mean_td_error: 0.00045217573642730713
        min_q: 4.726646900177002
    num_steps_sampled: 891904
    num_steps_trained: 8

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,871,1644.7,891904,50.716,58.416,44.7786,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-33-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.41596283162358
  episode_reward_mean: 50.887926708109006
  episode_reward_min: 44.38808422972041
  episodes_this_iter: 8
  episodes_total: 8936
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 893952
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007883945479989052
        max_q: 1.4488470554351807
        mean_q: 1.2634590864181519
        mean_td_error: -0.06797623634338379
        min_q: 1.1298036575317383
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008500244002789259
        max_q: 4.79473876953125
        mean_q: 4.715559959411621
        mean_td_error: -0.006294265389442444
        min_q: 4.675392150878906
    num_steps_sampled: 893952
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,873,1650.46,893952,50.8879,58.416,44.3881,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-33-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.41596283162358
  episode_reward_mean: 50.46328945707259
  episode_reward_min: 44.38808422972041
  episodes_this_iter: 16
  episodes_total: 8960
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 896000
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04254959151148796
        max_q: 1.532517433166504
        mean_q: 1.2296433448791504
        mean_td_error: -0.35530200600624084
        min_q: 1.0102391242980957
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003556508105248213
        max_q: 4.686307430267334
        mean_q: 4.588720321655273
        mean_td_error: -0.07837285101413727
        min_q: 4.510565280914307
    num_steps_sampled: 896000
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,875,1656.11,896000,50.4633,58.416,44.3881,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-33-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.65184533488155
  episode_reward_mean: 50.04463477780263
  episode_reward_min: 44.38808422972041
  episodes_this_iter: 8
  episodes_total: 8976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 898048
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008538560941815376
        max_q: 1.5134031772613525
        mean_q: 1.3946906328201294
        mean_td_error: -0.06875792145729065
        min_q: 1.2067643404006958
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017314243596047163
        max_q: 4.678411960601807
        mean_q: 4.483015537261963
        mean_td_error: 0.007027626037597656
        min_q: 4.326746463775635
    num_steps_sampled: 898048
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,877,1661.69,898048,50.0446,55.6518,44.3881,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-34-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.65184533488155
  episode_reward_mean: 50.271361676267674
  episode_reward_min: 44.38808422972041
  episodes_this_iter: 16
  episodes_total: 9000
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 900096
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0094982348382473
        max_q: 2.0669944286346436
        mean_q: 1.9907925128936768
        mean_td_error: 0.13083939254283905
        min_q: 1.905765175819397
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021639930084347725
        max_q: 4.7039384841918945
        mean_q: 4.543764591217041
        mean_td_error: -0.2522801160812378
        min_q: 4.0996904373168945
    num_steps_sampled: 900096
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,879,1667.59,900096,50.2714,55.6518,44.3881,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-34-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.65184533488155
  episode_reward_mean: 50.88546213374254
  episode_reward_min: 44.38808422972041
  episodes_this_iter: 8
  episodes_total: 9016
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 902144
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008036218583583832
        max_q: 2.923352003097534
        mean_q: 2.857174873352051
        mean_td_error: 0.110328309237957
        min_q: 2.746424913406372
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.07657372951507568
        max_q: 3.327861785888672
        mean_q: 2.2881178855895996
        mean_td_error: -1.682132601737976
        min_q: 1.7682979106903076
    num_steps_sampled: 902144
    num_steps_trained: 901152

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,881,1673.59,902144,50.8855,55.6518,44.3881,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-34-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.29491228147163
  episode_reward_mean: 50.91330076025513
  episode_reward_min: 44.78946626888387
  episodes_this_iter: 16
  episodes_total: 9040
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 904192
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005591488443315029
        max_q: 3.419991970062256
        mean_q: 3.355877637863159
        mean_td_error: 0.07517283409833908
        min_q: 3.153259038925171
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03947053104639053
        max_q: 3.643828868865967
        mean_q: 2.96818470954895
        mean_td_error: 0.5397501587867737
        min_q: 2.5384891033172607
    num_steps_sampled: 904192
    num_steps_trained: 90320

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,883,1680.81,904192,50.9133,54.2949,44.7895,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-34-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.29491228147163
  episode_reward_mean: 51.51406488921228
  episode_reward_min: 44.78946626888387
  episodes_this_iter: 8
  episodes_total: 9056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 906240
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006150295492261648
        max_q: 3.9908971786499023
        mean_q: 3.9459643363952637
        mean_td_error: 0.0803801640868187
        min_q: 3.7240915298461914
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003024653298780322
        max_q: 2.7896275520324707
        mean_q: 2.4692223072052
        mean_td_error: -0.011369563639163971
        min_q: 2.2962019443511963
    num_steps_sampled: 906240
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,885,1688.43,906240,51.5141,54.2949,44.7895,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-34-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.762708887927666
  episode_reward_mean: 52.44462045280889
  episode_reward_min: 47.89323529106748
  episodes_this_iter: 8
  episodes_total: 9080
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 908288
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003023137804120779
        max_q: 4.358691692352295
        mean_q: 4.319808006286621
        mean_td_error: 0.04365064203739166
        min_q: 4.2670817375183105
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006225668825209141
        max_q: 3.0068366527557373
        mean_q: 2.9138131141662598
        mean_td_error: 0.07335472851991653
        min_q: 2.7865521907806396
    num_steps_sampled: 908288
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,887,1696.22,908288,52.4446,58.7627,47.8932,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-34-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.762708887927666
  episode_reward_mean: 52.431113601762675
  episode_reward_min: 47.89323529106748
  episodes_this_iter: 8
  episodes_total: 9096
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 910336
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021449285559356213
        max_q: 4.541810989379883
        mean_q: 4.472253322601318
        mean_td_error: -0.02497829496860504
        min_q: 4.367909908294678
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022881613112986088
        max_q: 3.2537007331848145
        mean_q: 3.2056474685668945
        mean_td_error: -0.029251404106616974
        min_q: 3.0725173950195312
    num_steps_sampled: 910336
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,889,1703.99,910336,52.4311,58.7627,47.8932,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-34-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.762708887927666
  episode_reward_mean: 51.81884340766312
  episode_reward_min: 47.89323529106748
  episodes_this_iter: 8
  episodes_total: 9120
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 912384
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024868990294635296
        max_q: 4.455095291137695
        mean_q: 4.420297622680664
        mean_td_error: -0.03305526077747345
        min_q: 4.302576065063477
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010522463358938694
        max_q: 3.7352917194366455
        mean_q: 3.6458942890167236
        mean_td_error: 0.1455235332250595
        min_q: 3.52377986907959
    num_steps_sampled: 912384
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,891,1711.49,912384,51.8188,58.7627,47.8932,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-34-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.762708887927666
  episode_reward_mean: 51.52910012741533
  episode_reward_min: 47.27390241440409
  episodes_this_iter: 16
  episodes_total: 9144
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 914432
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014594044536352158
        max_q: 4.700527191162109
        mean_q: 4.660487651824951
        mean_td_error: 0.18243010342121124
        min_q: 4.586765766143799
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002204054966568947
        max_q: 3.8980228900909424
        mean_q: 3.8397910594940186
        mean_td_error: 0.02358456701040268
        min_q: 3.7872231006622314
    num_steps_sampled: 914432
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,893,1718.23,914432,51.5291,58.7627,47.2739,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-35-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.762708887927666
  episode_reward_mean: 51.013492014410865
  episode_reward_min: 47.27390241440409
  episodes_this_iter: 8
  episodes_total: 9160
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 916480
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029547875747084618
        max_q: 4.799882888793945
        mean_q: 4.72461462020874
        mean_td_error: -0.03991203010082245
        min_q: 4.651956081390381
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006688761059194803
        max_q: 4.062891006469727
        mean_q: 3.974153995513916
        mean_td_error: 0.0764547809958458
        min_q: 3.867314577102661
    num_steps_sampled: 916480
    num_steps_trained: 91

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,895,1724.2,916480,51.0135,58.7627,47.2739,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-35-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.126532944192164
  episode_reward_mean: 50.25381349675351
  episode_reward_min: 46.63861602594288
  episodes_this_iter: 16
  episodes_total: 9184
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 918528
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025693036150187254
        max_q: 4.703440189361572
        mean_q: 4.639533519744873
        mean_td_error: 0.03522735834121704
        min_q: 4.592403411865234
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004092194139957428
        max_q: 4.0362868309021
        mean_q: 3.972926139831543
        mean_td_error: -0.06102093309164047
        min_q: 3.9334230422973633
    num_steps_sampled: 918528
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,897,1730.14,918528,50.2538,54.1265,46.6386,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-35-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0881463398629
  episode_reward_mean: 50.42132786630544
  episode_reward_min: 46.63861602594288
  episodes_this_iter: 8
  episodes_total: 9200
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 920576
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004264496266841888
        max_q: 4.577247142791748
        mean_q: 4.520599842071533
        mean_td_error: -0.05834922194480896
        min_q: 4.3185625076293945
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016021670307964087
        max_q: 4.233293533325195
        mean_q: 4.155980110168457
        mean_td_error: 0.010650575160980225
        min_q: 4.063350200653076
    num_steps_sampled: 920576
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,899,1735.42,920576,50.4213,56.0881,46.6386,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-35-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0881463398629
  episode_reward_mean: 50.62456731285047
  episode_reward_min: 46.63861602594288
  episodes_this_iter: 8
  episodes_total: 9224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 922624
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0045286561362445354
        max_q: 4.601275444030762
        mean_q: 4.552844047546387
        mean_td_error: -0.06522205471992493
        min_q: 4.495700836181641
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009666041005402803
        max_q: 4.280624866485596
        mean_q: 4.183028697967529
        mean_td_error: 0.011130690574645996
        min_q: 4.11040735244751
    num_steps_sampled: 922624
    num_steps_trained: 92

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,901,1740.61,922624,50.6246,56.0881,46.6386,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-35-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0881463398629
  episode_reward_mean: 50.65809698021332
  episode_reward_min: 46.63861602594288
  episodes_this_iter: 8
  episodes_total: 9240
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 924672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027496968396008015
        max_q: 4.54265022277832
        mean_q: 4.450174331665039
        mean_td_error: -0.034109845757484436
        min_q: 4.341055393218994
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001590730040334165
        max_q: 4.44115686416626
        mean_q: 4.361053466796875
        mean_td_error: -0.013446018099784851
        min_q: 4.254701137542725
    num_steps_sampled: 924672
    num_steps_trained: 92

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,903,1745.6,924672,50.6581,56.0881,46.6386,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-35-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0881463398629
  episode_reward_mean: 50.17458693588855
  episode_reward_min: 46.63861602594288
  episodes_this_iter: 8
  episodes_total: 9272
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 927744
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001767102163285017
        max_q: 4.568911075592041
        mean_q: 4.479249954223633
        mean_td_error: 0.009977877140045166
        min_q: 4.375670433044434
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001642699702642858
        max_q: 4.465024471282959
        mean_q: 4.432427883148193
        mean_td_error: -0.024057671427726746
        min_q: 4.306880474090576
    num_steps_sampled: 927744
    num_steps_trained: 92

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,906,1752.54,927744,50.1746,56.0881,46.6386,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-35-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.75193890317599
  episode_reward_mean: 50.77613165028865
  episode_reward_min: 47.7978206630607
  episodes_this_iter: 8
  episodes_total: 9304
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 930816
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010661999695003033
        max_q: 4.484315872192383
        mean_q: 4.295316696166992
        mean_td_error: 0.1538645625114441
        min_q: 4.144376754760742
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029547368176281452
        max_q: 4.569084644317627
        mean_q: 4.483219623565674
        mean_td_error: -0.0479414165019989
        min_q: 4.380713939666748
    num_steps_sampled: 930816
    num_steps_trained: 92982

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,909,1759.46,930816,50.7761,54.7519,47.7978,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-35-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.75193890317599
  episode_reward_mean: 50.68793933960728
  episode_reward_min: 47.66508942681921
  episodes_this_iter: 8
  episodes_total: 9336
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 933888
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00368575775064528
        max_q: 4.17475700378418
        mean_q: 4.039966106414795
        mean_td_error: -0.054118067026138306
        min_q: 3.850632905960083
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033760142978280783
        max_q: 4.407927989959717
        mean_q: 4.3746466636657715
        mean_td_error: -0.05716340243816376
        min_q: 4.357194423675537
    num_steps_sampled: 933888
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,912,1766.44,933888,50.6879,54.7519,47.6651,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-35-55
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.75193890317599
  episode_reward_mean: 52.09538020888776
  episode_reward_min: 47.66508942681921
  episodes_this_iter: 16
  episodes_total: 9368
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 936960
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023737819865345955
        max_q: 4.021105766296387
        mean_q: 3.9543652534484863
        mean_td_error: -0.02595553547143936
        min_q: 3.8186047077178955
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001800596946850419
        max_q: 4.566847324371338
        mean_q: 4.515003681182861
        mean_td_error: 0.02664230763912201
        min_q: 4.460330009460449
    num_steps_sampled: 936960
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,915,1773.14,936960,52.0954,54.7519,47.6651,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-36-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.86377294003019
  episode_reward_mean: 52.06095146685178
  episode_reward_min: 47.66508942681921
  episodes_this_iter: 16
  episodes_total: 9400
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 940032
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008254792541265488
        max_q: 4.212710857391357
        mean_q: 4.155094623565674
        mean_td_error: 0.11982008814811707
        min_q: 4.107250213623047
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022298411931842566
        max_q: 4.668227672576904
        mean_q: 4.611114025115967
        mean_td_error: -0.031589850783348083
        min_q: 4.493789196014404
    num_steps_sampled: 940032
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,918,1779.7,940032,52.061,54.8638,47.6651,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-36-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.86377294003019
  episode_reward_mean: 52.52982426071707
  episode_reward_min: 47.729609969483775
  episodes_this_iter: 8
  episodes_total: 9424
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 943104
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00918678380548954
        max_q: 4.060841083526611
        mean_q: 3.9131360054016113
        mean_td_error: -0.11076658964157104
        min_q: 3.757788896560669
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001781817409209907
        max_q: 4.479674816131592
        mean_q: 4.440726280212402
        mean_td_error: -0.012423768639564514
        min_q: 4.280329704284668
    num_steps_sampled: 943104
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,921,1786.25,943104,52.5298,54.8638,47.7296,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-36-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.86377294003019
  episode_reward_mean: 51.95740064458421
  episode_reward_min: 46.2832309273478
  episodes_this_iter: 8
  episodes_total: 9456
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 946176
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004254139494150877
        max_q: 4.2359395027160645
        mean_q: 4.098752498626709
        mean_td_error: -0.053002648055553436
        min_q: 4.0334343910217285
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004586699418723583
        max_q: 4.432178974151611
        mean_q: 4.106292247772217
        mean_td_error: -0.020720593631267548
        min_q: 3.9013419151306152
    num_steps_sampled: 946176
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,924,1792.82,946176,51.9574,54.8638,46.2832,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-36-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.0394115069466
  episode_reward_mean: 52.33433602933017
  episode_reward_min: 46.2832309273478
  episodes_this_iter: 8
  episodes_total: 9488
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 949248
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023026668932288885
        max_q: 4.46795654296875
        mean_q: 4.374730110168457
        mean_td_error: -0.034925997257232666
        min_q: 4.302892684936523
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03135579824447632
        max_q: 3.7232613563537598
        mean_q: 3.206507444381714
        mean_td_error: -0.4649568796157837
        min_q: 2.9452662467956543
    num_steps_sampled: 949248
    num_steps_trained: 948

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,927,1799.37,949248,52.3343,55.0394,46.2832,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-36-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.24073256852432
  episode_reward_mean: 52.5067984524458
  episode_reward_min: 46.2832309273478
  episodes_this_iter: 8
  episodes_total: 9520
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 952320
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0052980659529566765
        max_q: 4.498926162719727
        mean_q: 4.458640098571777
        mean_td_error: 0.06219610571861267
        min_q: 4.309298515319824
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033693176228553057
        max_q: 3.5023040771484375
        mean_q: 3.306574821472168
        mean_td_error: -0.0534140020608902
        min_q: 3.217329978942871
    num_steps_sampled: 952320
    num_steps_trained: 951

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,930,1805.99,952320,52.5068,55.2407,46.2832,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-36-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.82198536396324
  episode_reward_mean: 53.376743031332644
  episode_reward_min: 48.06112372508159
  episodes_this_iter: 16
  episodes_total: 9552
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 955392
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017685306956991553
        max_q: 4.530173301696777
        mean_q: 4.417435646057129
        mean_td_error: -0.022556990385055542
        min_q: 4.361606121063232
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026078452356159687
        max_q: 3.7405614852905273
        mean_q: 3.5283620357513428
        mean_td_error: -0.002899870276451111
        min_q: 3.447354555130005
    num_steps_sampled: 955392
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,933,1812.74,955392,53.3767,56.822,48.0611,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-36-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.82198536396324
  episode_reward_mean: 53.58746590917911
  episode_reward_min: 48.06112372508159
  episodes_this_iter: 8
  episodes_total: 9568
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 957440
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002268437761813402
        max_q: 4.465935230255127
        mean_q: 4.368515968322754
        mean_td_error: -0.047181472182273865
        min_q: 4.3287739753723145
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005151881370693445
        max_q: 3.6471385955810547
        mean_q: 3.5863232612609863
        mean_td_error: -0.0766889750957489
        min_q: 3.4857592582702637
    num_steps_sampled: 957440
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,935,1817.65,957440,53.5875,56.822,48.0611,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-36-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.82198536396324
  episode_reward_mean: 54.006966570178854
  episode_reward_min: 48.06112372508159
  episodes_this_iter: 8
  episodes_total: 9592
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 959488
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001387984142638743
        max_q: 4.6175947189331055
        mean_q: 4.508204936981201
        mean_td_error: -0.023832112550735474
        min_q: 4.412173748016357
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0051278420723974705
        max_q: 3.853330373764038
        mean_q: 3.756448745727539
        mean_td_error: -0.04953451454639435
        min_q: 3.662144899368286
    num_steps_sampled: 959488
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,937,1823.17,959488,54.007,56.822,48.0611,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-36-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.82198536396324
  episode_reward_mean: 53.80199994002838
  episode_reward_min: 48.06112372508159
  episodes_this_iter: 8
  episodes_total: 9608
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 961536
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010834898566827178
        max_q: 4.377353191375732
        mean_q: 4.251697540283203
        mean_td_error: -0.008760645985603333
        min_q: 4.136364936828613
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012273924425244331
        max_q: 3.8498542308807373
        mean_q: 3.5494940280914307
        mean_td_error: -0.1299416422843933
        min_q: 3.1063027381896973
    num_steps_sampled: 961536
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,939,1828.99,961536,53.802,56.822,48.0611,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-36-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.32116186321086
  episode_reward_mean: 54.699766612440705
  episode_reward_min: 51.459483132148705
  episodes_this_iter: 8
  episodes_total: 9632
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 963584
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009577542659826577
        max_q: 4.064499378204346
        mean_q: 4.001965522766113
        mean_td_error: -0.012174546718597412
        min_q: 3.9319827556610107
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012428365647792816
        max_q: 2.985347270965576
        mean_q: 2.4123692512512207
        mean_td_error: 0.058685898780822754
        min_q: 1.7803089618682861
    num_steps_sampled: 963584
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,941,1834.96,963584,54.6998,59.3212,51.4595,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-37-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.32116186321086
  episode_reward_mean: 53.48939835756648
  episode_reward_min: 45.213566868429716
  episodes_this_iter: 16
  episodes_total: 9656
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 965632
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006133703049272299
        max_q: 4.136123180389404
        mean_q: 4.059178829193115
        mean_td_error: -0.11824458837509155
        min_q: 3.958496332168579
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018618682399392128
        max_q: 3.180656909942627
        mean_q: 2.0536274909973145
        mean_td_error: 0.06637324392795563
        min_q: 1.2671864032745361
    num_steps_sampled: 965632
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,943,1840.87,965632,53.4894,59.3212,45.2136,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-37-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.32116186321086
  episode_reward_mean: 53.12695797787987
  episode_reward_min: 45.213566868429716
  episodes_this_iter: 8
  episodes_total: 9672
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 967680
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001504444400779903
        max_q: 4.1940083503723145
        mean_q: 4.088926315307617
        mean_td_error: -0.014722570776939392
        min_q: 3.994854688644409
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004071900621056557
        max_q: 2.087376832962036
        mean_q: 1.9393701553344727
        mean_td_error: 0.041586585342884064
        min_q: 1.8109323978424072
    num_steps_sampled: 967680
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,945,1847.06,967680,53.127,59.3212,45.2136,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-37-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.32116186321086
  episode_reward_mean: 52.602119815740615
  episode_reward_min: 45.213566868429716
  episodes_this_iter: 16
  episodes_total: 9696
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 969728
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0048887478187680244
        max_q: 4.301754474639893
        mean_q: 4.1727070808410645
        mean_td_error: 0.10404072701931
        min_q: 3.9601807594299316
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010636547580361366
        max_q: 2.6088366508483887
        mean_q: 2.4832990169525146
        mean_td_error: -0.13210366666316986
        min_q: 2.310110569000244
    num_steps_sampled: 969728
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,947,1853.36,969728,52.6021,59.3212,45.2136,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-37-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.32116186321086
  episode_reward_mean: 52.16294676996566
  episode_reward_min: 45.213566868429716
  episodes_this_iter: 8
  episodes_total: 9712
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 971776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003184516914188862
        max_q: 4.402780055999756
        mean_q: 4.243631362915039
        mean_td_error: 0.05230773985385895
        min_q: 4.105833053588867
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018034912645816803
        max_q: 2.9579756259918213
        mean_q: 2.805392265319824
        mean_td_error: 0.002208605408668518
        min_q: 2.6968636512756348
    num_steps_sampled: 971776
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,949,1859.66,971776,52.1629,59.3212,45.2136,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-37-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.08108915274236
  episode_reward_mean: 51.401499180850735
  episode_reward_min: 45.213566868429716
  episodes_this_iter: 8
  episodes_total: 9736
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 973824
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001156327547505498
        max_q: 4.376607894897461
        mean_q: 4.290136337280273
        mean_td_error: -0.0014297664165496826
        min_q: 4.243983268737793
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002441817196086049
        max_q: 3.300530195236206
        mean_q: 3.0670931339263916
        mean_td_error: -0.026461631059646606
        min_q: 3.000133514404297
    num_steps_sampled: 973824
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,951,1865.35,973824,51.4015,57.0811,45.2136,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-37-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.08108915274236
  episode_reward_mean: 51.46286161083605
  episode_reward_min: 45.213566868429716
  episodes_this_iter: 8
  episodes_total: 9752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 975872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017476011998951435
        max_q: 4.406777381896973
        mean_q: 4.276613235473633
        mean_td_error: 0.033026307821273804
        min_q: 4.207645893096924
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014763284707441926
        max_q: 3.52554988861084
        mean_q: 3.4095029830932617
        mean_td_error: -0.010820597410202026
        min_q: 3.270324468612671
    num_steps_sampled: 975872
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,953,1870.64,975872,51.4629,57.0811,45.2136,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-37-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.187171554489076
  episode_reward_mean: 52.13281405188206
  episode_reward_min: 48.49703329663104
  episodes_this_iter: 8
  episodes_total: 9776
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 977920
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00626172311604023
        max_q: 4.311832904815674
        mean_q: 4.265010833740234
        mean_td_error: -0.12585246562957764
        min_q: 4.2072978019714355
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012950642267242074
        max_q: 3.717390298843384
        mean_q: 3.6502878665924072
        mean_td_error: -0.002921275794506073
        min_q: 3.5702686309814453
    num_steps_sampled: 977920
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,955,1875.77,977920,52.1328,56.1872,48.497,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-37-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.187171554489076
  episode_reward_mean: 52.048537228826966
  episode_reward_min: 48.49703329663104
  episodes_this_iter: 8
  episodes_total: 9792
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 979968
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0063925376161932945
        max_q: 4.310552597045898
        mean_q: 4.242238998413086
        mean_td_error: -0.1401766985654831
        min_q: 4.170732498168945
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00769842928275466
        max_q: 3.8321878910064697
        mean_q: 3.6921961307525635
        mean_td_error: -0.09380891174077988
        min_q: 3.6094653606414795
    num_steps_sampled: 979968
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,957,1880.74,979968,52.0485,56.1872,48.497,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-37-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.714331746436635
  episode_reward_mean: 52.6735914143253
  episode_reward_min: 48.38864134115944
  episodes_this_iter: 8
  episodes_total: 9824
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 983040
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.05254966393113136
        max_q: 4.437736511230469
        mean_q: 3.087879180908203
        mean_td_error: -0.8405421376228333
        min_q: 2.153210163116455
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010902449488639832
        max_q: 3.716158390045166
        mean_q: 3.507716655731201
        mean_td_error: -0.1483696699142456
        min_q: 3.400818347930908
    num_steps_sampled: 983040
    num_steps_trained: 98204

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,960,1887.72,983040,52.6736,57.7143,48.3886,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-38-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.714331746436635
  episode_reward_mean: 53.84620096708848
  episode_reward_min: 48.38864134115944
  episodes_this_iter: 8
  episodes_total: 9856
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 986112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03579670935869217
        max_q: 4.708424091339111
        mean_q: 4.3753461837768555
        mean_td_error: -0.40405765175819397
        min_q: 4.274085521697998
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021772021427750587
        max_q: 3.2222442626953125
        mean_q: 3.0799033641815186
        mean_td_error: -0.28060227632522583
        min_q: 2.956979274749756
    num_steps_sampled: 986112
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,963,1894.27,986112,53.8462,57.7143,48.3886,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-38-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.12228337602814
  episode_reward_mean: 54.27706261098406
  episode_reward_min: 47.734027063931485
  episodes_this_iter: 8
  episodes_total: 9888
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 989184
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032782803755253553
        max_q: 4.4648518562316895
        mean_q: 4.317684173583984
        mean_td_error: -0.03939189016819
        min_q: 4.259243965148926
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037675066851079464
        max_q: 3.28639817237854
        mean_q: 3.039337396621704
        mean_td_error: -0.041400447487831116
        min_q: 2.8472394943237305
    num_steps_sampled: 989184
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,966,1900.6,989184,54.2771,58.1223,47.734,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-38-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.12228337602814
  episode_reward_mean: 54.215795222529906
  episode_reward_min: 47.734027063931485
  episodes_this_iter: 8
  episodes_total: 9920
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 992256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023769468534737825
        max_q: 4.1942877769470215
        mean_q: 4.096776962280273
        mean_td_error: -0.026227451860904694
        min_q: 3.9931530952453613
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004839067347347736
        max_q: 3.4403903484344482
        mean_q: 3.2921299934387207
        mean_td_error: -0.06105516105890274
        min_q: 3.205970048904419
    num_steps_sampled: 992256
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,969,1906.96,992256,54.2158,58.1223,47.734,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-38-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.12228337602814
  episode_reward_mean: 53.87013330232681
  episode_reward_min: 47.734027063931485
  episodes_this_iter: 16
  episodes_total: 9952
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 995328
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037107428070157766
        max_q: 4.470232009887695
        mean_q: 4.400528907775879
        mean_td_error: -0.04357379674911499
        min_q: 4.216241836547852
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010752820409834385
        max_q: 3.376002550125122
        mean_q: 3.2590062618255615
        mean_td_error: -0.14773084223270416
        min_q: 3.055861711502075
    num_steps_sampled: 995328
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,972,1913.31,995328,53.8701,58.1223,47.734,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-38-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.9556493411426
  episode_reward_mean: 53.89108413728015
  episode_reward_min: 49.61695230283396
  episodes_this_iter: 16
  episodes_total: 9984
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 998400
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025800031144171953
        max_q: 4.355587959289551
        mean_q: 4.2966227531433105
        mean_td_error: -0.0341205894947052
        min_q: 4.193276405334473
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005927930120378733
        max_q: 3.524451971054077
        mean_q: 3.397325277328491
        mean_td_error: -0.06745562702417374
        min_q: 3.313847541809082
    num_steps_sampled: 998400
    num_steps_trained: 9

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,975,1919.62,998400,53.8911,56.9556,49.617,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-38-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.63475613525731
  episode_reward_mean: 54.284185789535314
  episode_reward_min: 49.61695230283396
  episodes_this_iter: 8
  episodes_total: 10008
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1001472
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013225626200437546
        max_q: 4.154491424560547
        mean_q: 4.014598369598389
        mean_td_error: -0.1778314858675003
        min_q: 3.753093719482422
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030785189010202885
        max_q: 3.662444829940796
        mean_q: 3.467238426208496
        mean_td_error: -0.029994197189807892
        min_q: 3.2767534255981445
    num_steps_sampled: 1001472
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,978,1926,1001472,54.2842,58.6348,49.617,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-38-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.63475613525731
  episode_reward_mean: 54.7112165827102
  episode_reward_min: 49.61695230283396
  episodes_this_iter: 8
  episodes_total: 10040
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1004544
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008141069673001766
        max_q: 3.9572091102600098
        mean_q: 3.870875835418701
        mean_td_error: -0.08937223255634308
        min_q: 3.8202316761016846
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033798194490373135
        max_q: 3.6999239921569824
        mean_q: 3.5892505645751953
        mean_td_error: -0.042136624455451965
        min_q: 3.4644217491149902
    num_steps_sampled: 1004544
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,981,1933.03,1004544,54.7112,58.6348,49.617,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-38-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.63475613525731
  episode_reward_mean: 54.33405371867058
  episode_reward_min: 49.50944298941876
  episodes_this_iter: 8
  episodes_total: 10072
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1007616
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008453951217234135
        max_q: 3.7754502296447754
        mean_q: 3.6964364051818848
        mean_td_error: -0.1088671088218689
        min_q: 3.6224586963653564
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008093003183603287
        max_q: 3.9216604232788086
        mean_q: 3.790722370147705
        mean_td_error: -0.1086692363023758
        min_q: 3.7414543628692627
    num_steps_sampled: 1007616
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,984,1940.16,1007616,54.3341,58.6348,49.5094,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-38-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.1432907249543
  episode_reward_mean: 54.26450884053186
  episode_reward_min: 49.50944298941876
  episodes_this_iter: 8
  episodes_total: 10104
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1010688
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008992950432002544
        max_q: 3.7827672958374023
        mean_q: 3.683069944381714
        mean_td_error: -0.136956125497818
        min_q: 3.5691277980804443
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005850648041814566
        max_q: 3.8685615062713623
        mean_q: 3.750675678253174
        mean_td_error: -0.0705128014087677
        min_q: 3.7105865478515625
    num_steps_sampled: 1010688
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,987,1947.24,1010688,54.2645,57.1433,49.5094,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-39-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.124510445217005
  episode_reward_mean: 53.80079804760982
  episode_reward_min: 49.50944298941876
  episodes_this_iter: 16
  episodes_total: 10136
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1013760
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011573298834264278
        max_q: 3.932779312133789
        mean_q: 3.84775972366333
        mean_td_error: -0.1627940535545349
        min_q: 3.7717666625976562
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013698136433959007
        max_q: 4.107813358306885
        mean_q: 3.8976101875305176
        mean_td_error: -0.14183524250984192
        min_q: 3.7877914905548096
    num_steps_sampled: 1013760
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,990,1954.15,1013760,53.8008,57.1245,49.5094,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-39-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.839106790040375
  episode_reward_mean: 53.91963962992681
  episode_reward_min: 49.50944298941876
  episodes_this_iter: 16
  episodes_total: 10168
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1016832
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008021336048841476
        max_q: 4.2517876625061035
        mean_q: 4.191960334777832
        mean_td_error: -0.10294777154922485
        min_q: 4.123973369598389
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009165540337562561
        max_q: 4.098068714141846
        mean_q: 3.7712454795837402
        mean_td_error: -0.07339327037334442
        min_q: 3.5321083068847656
    num_steps_sampled: 1016832
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,993,1961.11,1016832,53.9196,56.8391,49.5094,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-39-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.65842939511363
  episode_reward_mean: 53.84180732750533
  episode_reward_min: 49.16015086281525
  episodes_this_iter: 8
  episodes_total: 10192
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1019904
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005602756515145302
        max_q: 4.192694187164307
        mean_q: 4.092795372009277
        mean_td_error: -0.08050042390823364
        min_q: 4.003120422363281
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04215187206864357
        max_q: 2.9136548042297363
        mean_q: 1.7739055156707764
        mean_td_error: -0.4228355288505554
        min_q: 1.1183485984802246
    num_steps_sampled: 1019904
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,996,1967.81,1019904,53.8418,57.6584,49.1602,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-39-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.65842939511363
  episode_reward_mean: 54.13278806516405
  episode_reward_min: 49.16015086281525
  episodes_this_iter: 8
  episodes_total: 10224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1022976
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015768393874168396
        max_q: 4.015929698944092
        mean_q: 3.6478219032287598
        mean_td_error: -0.21486815810203552
        min_q: 3.4147231578826904
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019466478377580643
        max_q: 1.1166417598724365
        mean_q: 0.9557939767837524
        mean_td_error: -0.19283869862556458
        min_q: 0.8133014440536499
    num_steps_sampled: 1022976
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,999,1974.44,1022976,54.1328,57.6584,49.1602,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-39-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.65842939511363
  episode_reward_mean: 53.497234963070426
  episode_reward_min: 49.16015086281525
  episodes_this_iter: 8
  episodes_total: 10256
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1026048
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015979282557964325
        max_q: 3.795854330062866
        mean_q: 3.511946201324463
        mean_td_error: -0.2224295735359192
        min_q: 3.1724090576171875
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01007667277008295
        max_q: 1.7970130443572998
        mean_q: 1.6746259927749634
        mean_td_error: -0.09425435960292816
        min_q: 1.5571913719177246
    num_steps_sampled: 1026048
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1002,1981.13,1026048,53.4972,57.6584,49.1602,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-39-38
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.65842939511363
  episode_reward_mean: 52.801182187343876
  episode_reward_min: 47.71608314087943
  episodes_this_iter: 8
  episodes_total: 10288
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1029120
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037810213398188353
        max_q: 3.1462302207946777
        mean_q: 2.9581782817840576
        mean_td_error: -0.043424785137176514
        min_q: 2.849188804626465
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009322652593255043
        max_q: 2.4720640182495117
        mean_q: 2.3442344665527344
        mean_td_error: 0.20788052678108215
        min_q: 2.2207348346710205
    num_steps_sampled: 1029120
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1005,1987.71,1029120,52.8012,57.6584,47.7161,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-39-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.861668577442096
  episode_reward_mean: 52.272206350903524
  episode_reward_min: 47.71608314087943
  episodes_this_iter: 16
  episodes_total: 10320
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1032192
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0056532989256083965
        max_q: 3.2235045433044434
        mean_q: 3.1337664127349854
        mean_td_error: -0.08006469905376434
        min_q: 3.0048670768737793
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0042942967265844345
        max_q: 3.665618896484375
        mean_q: 3.5859053134918213
        mean_td_error: 0.0844743475317955
        min_q: 3.385030508041382
    num_steps_sampled: 1032192
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1008,1994.34,1032192,52.2722,55.8617,47.7161,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-39-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.861668577442096
  episode_reward_mean: 51.863809378900804
  episode_reward_min: 47.71608314087943
  episodes_this_iter: 16
  episodes_total: 10352
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1035264
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009601280093193054
        max_q: 3.184591054916382
        mean_q: 3.067898750305176
        mean_td_error: -0.1378142237663269
        min_q: 2.929582118988037
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012524562189355493
        max_q: 4.364780902862549
        mean_q: 4.3308563232421875
        mean_td_error: 0.024480685591697693
        min_q: 4.265461444854736
    num_steps_sampled: 1035264
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1011,2000.91,1035264,51.8638,55.8617,47.7161,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-39-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.861668577442096
  episode_reward_mean: 51.42739876992038
  episode_reward_min: 47.71608314087943
  episodes_this_iter: 8
  episodes_total: 10376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1038336
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00248326756991446
        max_q: 3.5851752758026123
        mean_q: 3.508307933807373
        mean_td_error: -0.03146231919527054
        min_q: 3.388051986694336
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003431198187172413
        max_q: 4.798051357269287
        mean_q: 4.757220268249512
        mean_td_error: 0.07324035465717316
        min_q: 4.70005989074707
    num_steps_sampled: 1038336
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1014,2007.74,1038336,51.4274,55.8617,47.7161,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-40-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.13886148854425
  episode_reward_mean: 51.22153251024999
  episode_reward_min: 47.97472326547631
  episodes_this_iter: 8
  episodes_total: 10408
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1041408
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007696828339248896
        max_q: 3.65254545211792
        mean_q: 3.5241286754608154
        mean_td_error: -0.10036803781986237
        min_q: 3.4053750038146973
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0003863904275931418
        max_q: 4.967203617095947
        mean_q: 4.92446231842041
        mean_td_error: 0.004511117935180664
        min_q: 4.819070816040039
    num_steps_sampled: 1041408
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1017,2014.63,1041408,51.2215,55.1389,47.9747,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-40-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.408786865781515
  episode_reward_mean: 50.79033809581417
  episode_reward_min: 47.97472326547631
  episodes_this_iter: 8
  episodes_total: 10440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1044480
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018804719438776374
        max_q: 3.6649534702301025
        mean_q: 3.545865058898926
        mean_td_error: -0.024363689124584198
        min_q: 3.4304428100585938
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002211423357948661
        max_q: 5.115297317504883
        mean_q: 5.062138557434082
        mean_td_error: 0.041002094745635986
        min_q: 4.966680526733398
    num_steps_sampled: 1044480
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1020,2021.51,1044480,50.7903,53.4088,47.9747,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-40-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.408786865781515
  episode_reward_mean: 50.83186278969632
  episode_reward_min: 46.30444610196995
  episodes_this_iter: 8
  episodes_total: 10472
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1047552
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005997581873089075
        max_q: 3.718017101287842
        mean_q: 3.6459178924560547
        mean_td_error: -0.07545862346887589
        min_q: 3.569012403488159
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008052369812503457
        max_q: 5.104713439941406
        mean_q: 5.06170654296875
        mean_td_error: -0.014389723539352417
        min_q: 5.012857437133789
    num_steps_sampled: 1047552
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1023,2028.87,1047552,50.8319,53.4088,46.3044,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-40-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.921157649078175
  episode_reward_mean: 50.83395961150053
  episode_reward_min: 46.30444610196995
  episodes_this_iter: 16
  episodes_total: 10496
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1049600
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009445506148040295
        max_q: 3.7365002632141113
        mean_q: 3.617647647857666
        mean_td_error: 0.1455235481262207
        min_q: 3.569976806640625
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001910172519274056
        max_q: 5.205005645751953
        mean_q: 5.1713361740112305
        mean_td_error: 0.03513920307159424
        min_q: 5.09278678894043
    num_steps_sampled: 1049600
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1025,2033.96,1049600,50.834,54.9212,46.3044,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-40-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.921157649078175
  episode_reward_mean: 50.680825171561835
  episode_reward_min: 46.30444610196995
  episodes_this_iter: 8
  episodes_total: 10512
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1051648
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0054945568554103374
        max_q: 3.8946211338043213
        mean_q: 3.790334463119507
        mean_td_error: -0.07025747746229172
        min_q: 3.669340133666992
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.000883430999238044
        max_q: 5.23839807510376
        mean_q: 5.148667335510254
        mean_td_error: -0.0004980117082595825
        min_q: 5.070854187011719
    num_steps_sampled: 1051648
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1027,2038.97,1051648,50.6808,54.9212,46.3044,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-40-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.921157649078175
  episode_reward_mean: 50.43434254318231
  episode_reward_min: 46.30444610196995
  episodes_this_iter: 16
  episodes_total: 10536
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1053696
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004981954116374254
        max_q: 3.9454309940338135
        mean_q: 3.8936688899993896
        mean_td_error: -0.07969334721565247
        min_q: 3.7917444705963135
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006132090929895639
        max_q: 4.994036674499512
        mean_q: 4.94611930847168
        mean_td_error: -0.13911090791225433
        min_q: 4.918545246124268
    num_steps_sampled: 1053696
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1029,2044.06,1053696,50.4343,54.9212,46.3044,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-40-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.921157649078175
  episode_reward_mean: 50.52072172474121
  episode_reward_min: 46.30444610196995
  episodes_this_iter: 8
  episodes_total: 10552
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1055744
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01205954048782587
        max_q: 3.741938829421997
        mean_q: 3.569282054901123
        mean_td_error: -0.2015553116798401
        min_q: 3.3460121154785156
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028594525065273046
        max_q: 4.9202117919921875
        mean_q: 4.886975288391113
        mean_td_error: -0.05575434863567352
        min_q: 4.786331653594971
    num_steps_sampled: 1055744
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1031,2049.09,1055744,50.5207,54.9212,46.3044,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-40-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.921157649078175
  episode_reward_mean: 50.80436968881828
  episode_reward_min: 47.0183631150912
  episodes_this_iter: 16
  episodes_total: 10576
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1057792
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019188471138477325
        max_q: 3.5003020763397217
        mean_q: 3.2973990440368652
        mean_td_error: -0.324205219745636
        min_q: 3.1100900173187256
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004214494023472071
        max_q: 4.911088466644287
        mean_q: 4.850915431976318
        mean_td_error: -0.08486759662628174
        min_q: 4.803328037261963
    num_steps_sampled: 1057792
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1033,2054.16,1057792,50.8044,54.9212,47.0184,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-40-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.02024762984916
  episode_reward_mean: 50.46147457222267
  episode_reward_min: 47.0183631150912
  episodes_this_iter: 8
  episodes_total: 10592
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1059840
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006874739192426205
        max_q: 3.4930930137634277
        mean_q: 3.4056472778320312
        mean_td_error: -0.09906697273254395
        min_q: 3.290736198425293
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003949676174670458
        max_q: 4.854597568511963
        mean_q: 4.808422565460205
        mean_td_error: -0.0824083536863327
        min_q: 4.720331192016602
    num_steps_sampled: 1059840
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1035,2059.24,1059840,50.4615,54.0202,47.0184,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-40-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.02024762984916
  episode_reward_mean: 50.764997355167836
  episode_reward_min: 47.0183631150912
  episodes_this_iter: 8
  episodes_total: 10616
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1061888
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006997169926762581
        max_q: 3.5644075870513916
        mean_q: 3.500457286834717
        mean_td_error: -0.12156645208597183
        min_q: 3.4010629653930664
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018165294313803315
        max_q: 4.887485027313232
        mean_q: 4.817892074584961
        mean_td_error: -0.03623571991920471
        min_q: 4.736329555511475
    num_steps_sampled: 1061888
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1037,2064.28,1061888,50.765,54.0202,47.0184,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-41-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.63834392571477
  episode_reward_mean: 50.915175999044315
  episode_reward_min: 47.0183631150912
  episodes_this_iter: 16
  episodes_total: 10648
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1064960
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0007719910354353487
        max_q: 3.6188673973083496
        mean_q: 3.5590925216674805
        mean_td_error: -0.0002564042806625366
        min_q: 3.407282590866089
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003680875990539789
        max_q: 4.871014595031738
        mean_q: 4.832125663757324
        mean_td_error: -0.06505914032459259
        min_q: 4.781592845916748
    num_steps_sampled: 1064960
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1040,2071.38,1064960,50.9152,53.6383,47.0184,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-41-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.5656691821379
  episode_reward_mean: 50.993230651265286
  episode_reward_min: 48.678872285987175
  episodes_this_iter: 16
  episodes_total: 10680
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1068032
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007218839135020971
        max_q: 3.822993040084839
        mean_q: 3.684610366821289
        mean_td_error: -0.11526492983102798
        min_q: 3.582559823989868
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001818214193917811
        max_q: 4.958930492401123
        mean_q: 4.833453178405762
        mean_td_error: -0.028262943029403687
        min_q: 4.768558979034424
    num_steps_sampled: 1068032
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1043,2078.18,1068032,50.9932,53.5657,48.6789,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-41-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.5656691821379
  episode_reward_mean: 50.63731543993586
  episode_reward_min: 47.574924205515856
  episodes_this_iter: 8
  episodes_total: 10704
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1071104
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00570819154381752
        max_q: 3.6930322647094727
        mean_q: 3.521489143371582
        mean_td_error: -0.07244446873664856
        min_q: 3.3415913581848145
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004600741434842348
        max_q: 4.771642684936523
        mean_q: 4.678109645843506
        mean_td_error: 0.10139629244804382
        min_q: 4.475094795227051
    num_steps_sampled: 1071104
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1046,2084.82,1071104,50.6373,53.5657,47.5749,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-41-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 52.684731891089584
  episode_reward_mean: 50.232840333348896
  episode_reward_min: 46.30003517533895
  episodes_this_iter: 8
  episodes_total: 10736
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1074176
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014168877387419343
        max_q: 3.6073734760284424
        mean_q: 3.477170705795288
        mean_td_error: -0.011951655149459839
        min_q: 3.4041011333465576
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012776288203895092
        max_q: 4.723203182220459
        mean_q: 4.52470588684082
        mean_td_error: -0.20170918107032776
        min_q: 4.270227909088135
    num_steps_sampled: 1074176
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1049,2091.54,1074176,50.2328,52.6847,46.3,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-41-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.40370401506538
  episode_reward_mean: 49.99914561959994
  episode_reward_min: 46.30003517533895
  episodes_this_iter: 8
  episodes_total: 10768
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1077248
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01135589461773634
        max_q: 3.6520345211029053
        mean_q: 3.5878922939300537
        mean_td_error: -0.19689218699932098
        min_q: 3.5116078853607178
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019380848854780197
        max_q: 4.162858009338379
        mean_q: 3.968797206878662
        mean_td_error: -0.3031768202781677
        min_q: 3.77152943611145
    num_steps_sampled: 1077248
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1052,2098.14,1077248,49.9991,54.4037,46.3,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-41-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.40370401506538
  episode_reward_mean: 50.18525964854172
  episode_reward_min: 44.842145292371214
  episodes_this_iter: 8
  episodes_total: 10800
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1080320
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001673452672548592
        max_q: 3.920478105545044
        mean_q: 3.8397557735443115
        mean_td_error: -0.0013870000839233398
        min_q: 3.753774881362915
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0058394502848386765
        max_q: 3.9707517623901367
        mean_q: 3.910093069076538
        mean_td_error: -0.08949477225542068
        min_q: 3.8488070964813232
    num_steps_sampled: 1080320
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1055,2104.53,1080320,50.1853,54.4037,44.8421,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-41-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.40370401506538
  episode_reward_mean: 49.417706964576794
  episode_reward_min: 43.5260087368306
  episodes_this_iter: 16
  episodes_total: 10832
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1083392
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006647665519267321
        max_q: 4.063735008239746
        mean_q: 3.978458881378174
        mean_td_error: -0.09905479848384857
        min_q: 3.8647408485412598
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011969744227826595
        max_q: 3.9911739826202393
        mean_q: 3.8318164348602295
        mean_td_error: -0.179772287607193
        min_q: 3.6557018756866455
    num_steps_sampled: 1083392
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1058,2110.95,1083392,49.4177,54.4037,43.526,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-41-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.05480056815517
  episode_reward_mean: 47.938682310246016
  episode_reward_min: 43.5260087368306
  episodes_this_iter: 16
  episodes_total: 10864
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1086464
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002917727455496788
        max_q: 4.160808086395264
        mean_q: 4.127569198608398
        mean_td_error: 0.03276519477367401
        min_q: 3.9362874031066895
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006102561950683594
        max_q: 3.702564239501953
        mean_q: 3.6083877086639404
        mean_td_error: -0.09676897525787354
        min_q: 3.5327882766723633
    num_steps_sampled: 1086464
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1061,2117.4,1086464,47.9387,54.0548,43.526,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-41-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.8759853362258
  episode_reward_mean: 48.160796097605335
  episode_reward_min: 43.5260087368306
  episodes_this_iter: 8
  episodes_total: 10888
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1089536
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.055519863963127136
        max_q: 3.892448663711548
        mean_q: 3.657498836517334
        mean_td_error: -0.6977609395980835
        min_q: 3.2944540977478027
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008493333123624325
        max_q: 3.812948226928711
        mean_q: 3.688894748687744
        mean_td_error: -0.13767361640930176
        min_q: 3.520010232925415
    num_steps_sampled: 1089536
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1064,2124.01,1089536,48.1608,53.876,43.526,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-42-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 52.17358659601617
  episode_reward_mean: 48.38965407940313
  episode_reward_min: 43.5260087368306
  episodes_this_iter: 8
  episodes_total: 10920
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1092608
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02717539295554161
        max_q: 3.5417861938476562
        mean_q: 3.0189290046691895
        mean_td_error: -0.2931515574455261
        min_q: 2.871011257171631
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01149758044630289
        max_q: 3.5882608890533447
        mean_q: 3.386892795562744
        mean_td_error: -0.16791623830795288
        min_q: 3.194866180419922
    num_steps_sampled: 1092608
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1067,2130.87,1092608,48.3897,52.1736,43.526,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-42-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.99395424073437
  episode_reward_mean: 50.73580079345581
  episode_reward_min: 43.674111203242916
  episodes_this_iter: 8
  episodes_total: 10952
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1095680
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013682697899639606
        max_q: 3.2711238861083984
        mean_q: 3.0684776306152344
        mean_td_error: -0.15144726634025574
        min_q: 2.950105667114258
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007823155261576176
        max_q: 3.6507351398468018
        mean_q: 3.398592233657837
        mean_td_error: -0.12538659572601318
        min_q: 3.2385623455047607
    num_steps_sampled: 1095680
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1070,2137.52,1095680,50.7358,55.994,43.6741,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-42-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.28657574168271
  episode_reward_mean: 53.036596932045825
  episode_reward_min: 45.95421815959188
  episodes_this_iter: 8
  episodes_total: 10984
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1098752
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01741810515522957
        max_q: 3.2478549480438232
        mean_q: 2.991908550262451
        mean_td_error: -0.17444860935211182
        min_q: 2.8308825492858887
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0052645388059318066
        max_q: 3.379392147064209
        mean_q: 3.2965517044067383
        mean_td_error: -0.08505573868751526
        min_q: 3.2402420043945312
    num_steps_sampled: 1098752
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1073,2144.38,1098752,53.0366,56.2866,45.9542,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-42-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.81412083578523
  episode_reward_mean: 54.31655920177565
  episode_reward_min: 49.5654407887334
  episodes_this_iter: 8
  episodes_total: 11016
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1101824
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008828879334032536
        max_q: 3.2297043800354004
        mean_q: 3.0622098445892334
        mean_td_error: -0.07278844714164734
        min_q: 2.9657537937164307
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005402599461376667
        max_q: 3.4100983142852783
        mean_q: 3.2578506469726562
        mean_td_error: -0.08570874482393265
        min_q: 3.1504533290863037
    num_steps_sampled: 1101824
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1076,2151.35,1101824,54.3166,57.8141,49.5654,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-42-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.81412083578523
  episode_reward_mean: 54.49344918784156
  episode_reward_min: 49.195375463001405
  episodes_this_iter: 16
  episodes_total: 11048
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1104896
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03170148655772209
        max_q: 3.516866445541382
        mean_q: 3.0522334575653076
        mean_td_error: -0.2723415791988373
        min_q: 2.8053579330444336
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002024258952587843
        max_q: 3.5992543697357178
        mean_q: 3.490067958831787
        mean_td_error: 0.014684565365314484
        min_q: 3.3421683311462402
    num_steps_sampled: 1104896
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1079,2158.26,1104896,54.4934,57.8141,49.1954,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-42-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.81412083578523
  episode_reward_mean: 53.653483930587775
  episode_reward_min: 49.195375463001405
  episodes_this_iter: 8
  episodes_total: 11072
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1107968
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0330502986907959
        max_q: 2.8687198162078857
        mean_q: 2.6872429847717285
        mean_td_error: -0.3345084488391876
        min_q: 2.5471603870391846
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0062253437936306
        max_q: 3.434497117996216
        mean_q: 3.289621114730835
        mean_td_error: -0.0938342735171318
        min_q: 3.2336697578430176
    num_steps_sampled: 1107968
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1082,2165.15,1107968,53.6535,57.8141,49.1954,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-42-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.81412083578523
  episode_reward_mean: 53.23503712947344
  episode_reward_min: 49.195375463001405
  episodes_this_iter: 8
  episodes_total: 11096
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1110016
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014612575992941856
        max_q: 2.8822643756866455
        mean_q: 2.6805996894836426
        mean_td_error: -0.1379225254058838
        min_q: 2.366675615310669
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01263887993991375
        max_q: 3.61791729927063
        mean_q: 3.233856439590454
        mean_td_error: -0.19113725423812866
        min_q: 2.916090488433838
    num_steps_sampled: 1110016
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1084,2170,1110016,53.235,57.8141,49.1954,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-42-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.91591822859306
  episode_reward_mean: 53.3214976946564
  episode_reward_min: 48.80402638283053
  episodes_this_iter: 16
  episodes_total: 11120
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1112064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01110604777932167
        max_q: 2.8314034938812256
        mean_q: 2.635000228881836
        mean_td_error: -0.10741297900676727
        min_q: 2.4824652671813965
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033048922196030617
        max_q: 3.3878133296966553
        mean_q: 3.2292263507843018
        mean_td_error: -0.023970238864421844
        min_q: 2.987823009490967
    num_steps_sampled: 1112064
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1086,2174.84,1112064,53.3215,58.9159,48.804,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-42-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.91591822859306
  episode_reward_mean: 53.09071709312564
  episode_reward_min: 48.765065646043205
  episodes_this_iter: 8
  episodes_total: 11136
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1114112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02584231086075306
        max_q: 2.655022382736206
        mean_q: 2.4026660919189453
        mean_td_error: -0.26070597767829895
        min_q: 2.0735106468200684
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0177014097571373
        max_q: 3.4638795852661133
        mean_q: 3.208022117614746
        mean_td_error: -0.2900695502758026
        min_q: 2.975123167037964
    num_steps_sampled: 1114112
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1088,2179.71,1114112,53.0907,58.9159,48.7651,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-43-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.91591822859306
  episode_reward_mean: 54.071119144613824
  episode_reward_min: 48.765065646043205
  episodes_this_iter: 8
  episodes_total: 11168
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1117184
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0692375898361206
        max_q: 2.155693531036377
        mean_q: 1.796099305152893
        mean_td_error: -0.6800208687782288
        min_q: 1.489969253540039
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03967207670211792
        max_q: 2.9866933822631836
        mean_q: 2.277801036834717
        mean_td_error: -0.40862664580345154
        min_q: 1.6415472030639648
    num_steps_sampled: 1117184
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1091,2186.79,1117184,54.0711,58.9159,48.7651,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-43-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.91591822859306
  episode_reward_mean: 54.42474735927318
  episode_reward_min: 48.765065646043205
  episodes_this_iter: 8
  episodes_total: 11200
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1120256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007848745211958885
        max_q: 1.738022804260254
        mean_q: 1.5679361820220947
        mean_td_error: 0.10877400636672974
        min_q: 1.3899080753326416
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013790150173008442
        max_q: 1.9864635467529297
        mean_q: 1.7555556297302246
        mean_td_error: -0.1493295282125473
        min_q: 1.6164461374282837
    num_steps_sampled: 1120256
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1094,2193.93,1120256,54.4247,58.9159,48.7651,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-43-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.81877617002057
  episode_reward_mean: 54.66074573895346
  episode_reward_min: 48.765065646043205
  episodes_this_iter: 16
  episodes_total: 11232
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1123328
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018360065296292305
        max_q: 3.001278877258301
        mean_q: 2.8425955772399902
        mean_td_error: 0.24626334011554718
        min_q: 2.667550802230835
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00248731579631567
        max_q: 1.7639167308807373
        mean_q: 1.6866856813430786
        mean_td_error: 0.03534863889217377
        min_q: 1.4663349390029907
    num_steps_sampled: 1123328
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1097,2200.89,1123328,54.6607,57.8188,48.7651,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-43-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.81877617002057
  episode_reward_mean: 54.06771756738703
  episode_reward_min: 49.0008629556715
  episodes_this_iter: 16
  episodes_total: 11264
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1126400
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010442261584103107
        max_q: 3.490652084350586
        mean_q: 3.3855392932891846
        mean_td_error: -0.1228853315114975
        min_q: 3.2524681091308594
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002735786372795701
        max_q: 2.612536907196045
        mean_q: 2.5229756832122803
        mean_td_error: 0.03284778445959091
        min_q: 2.35884428024292
    num_steps_sampled: 1126400
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1100,2207.5,1126400,54.0677,57.8188,49.0009,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-43-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.734623012181416
  episode_reward_mean: 54.37187540194716
  episode_reward_min: 49.0008629556715
  episodes_this_iter: 8
  episodes_total: 11288
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1129472
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024331165477633476
        max_q: 3.9618051052093506
        mean_q: 3.872743606567383
        mean_td_error: 0.03068166971206665
        min_q: 3.8273730278015137
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015522456960752606
        max_q: 3.250223159790039
        mean_q: 3.1204428672790527
        mean_td_error: 0.01606334000825882
        min_q: 2.917905807495117
    num_steps_sampled: 1129472
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1103,2214.11,1129472,54.3719,57.7346,49.0009,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-43-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.662430139477785
  episode_reward_mean: 54.42329923489311
  episode_reward_min: 49.0008629556715
  episodes_this_iter: 8
  episodes_total: 11320
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1132544
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009991191327571869
        max_q: 4.196667671203613
        mean_q: 4.083636283874512
        mean_td_error: -0.08933928608894348
        min_q: 3.9662322998046875
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001987909898161888
        max_q: 3.829583168029785
        mean_q: 3.751591682434082
        mean_td_error: -0.03499654680490494
        min_q: 3.64910888671875
    num_steps_sampled: 1132544
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1106,2220.76,1132544,54.4233,57.6624,49.0009,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-43-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.87534344875002
  episode_reward_mean: 54.806876588478545
  episode_reward_min: 49.0008629556715
  episodes_this_iter: 8
  episodes_total: 11352
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1135616
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006163387559354305
        max_q: 4.162533760070801
        mean_q: 4.067602634429932
        mean_td_error: 0.05805520713329315
        min_q: 3.9038503170013428
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009291760623455048
        max_q: 3.7907474040985107
        mean_q: 3.7131621837615967
        mean_td_error: -0.14649367332458496
        min_q: 3.6362719535827637
    num_steps_sampled: 1135616
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1109,2227.6,1135616,54.8069,58.8753,49.0009,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-43-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.87534344875002
  episode_reward_mean: 54.21571956097945
  episode_reward_min: 49.336591069885806
  episodes_this_iter: 8
  episodes_total: 11384
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1138688
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013148797675967216
        max_q: 4.258565902709961
        mean_q: 4.1185808181762695
        mean_td_error: -0.139718696475029
        min_q: 3.968400478363037
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002014843048527837
        max_q: 3.8816146850585938
        mean_q: 3.815559148788452
        mean_td_error: -0.02260708063840866
        min_q: 3.7412519454956055
    num_steps_sampled: 1138688
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1112,2234.25,1138688,54.2157,58.8753,49.3366,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-44-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.87534344875002
  episode_reward_mean: 53.7484184123256
  episode_reward_min: 48.489825394839876
  episodes_this_iter: 16
  episodes_total: 11416
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1141760
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004866299219429493
        max_q: 4.322208881378174
        mean_q: 4.202730178833008
        mean_td_error: 0.022432811558246613
        min_q: 4.15516996383667
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028984390664845705
        max_q: 3.983442544937134
        mean_q: 3.8634955883026123
        mean_td_error: -0.032967761158943176
        min_q: 3.7519102096557617
    num_steps_sampled: 1141760
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1115,2240.97,1141760,53.7484,58.8753,48.4898,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-44-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.50731852791938
  episode_reward_mean: 53.27572371513273
  episode_reward_min: 48.489825394839876
  episodes_this_iter: 16
  episodes_total: 11448
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1144832
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006171144545078278
        max_q: 4.456645965576172
        mean_q: 4.35396671295166
        mean_td_error: -0.06259770691394806
        min_q: 4.2841010093688965
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025408039800822735
        max_q: 4.331867694854736
        mean_q: 4.246511936187744
        mean_td_error: -0.0359853059053421
        min_q: 4.194759368896484
    num_steps_sampled: 1144832
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1118,2247.62,1144832,53.2757,56.5073,48.4898,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-44-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.50731852791938
  episode_reward_mean: 53.18853832423093
  episode_reward_min: 48.489825394839876
  episodes_this_iter: 8
  episodes_total: 11472
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1147904
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020678353030234575
        max_q: 4.744241714477539
        mean_q: 4.692794322967529
        mean_td_error: 0.02154979109764099
        min_q: 4.541555404663086
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004441285505890846
        max_q: 4.3483052253723145
        mean_q: 4.1891937255859375
        mean_td_error: -0.06914065778255463
        min_q: 4.073601722717285
    num_steps_sampled: 1147904
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1121,2254.25,1147904,53.1885,56.5073,48.4898,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-44-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.36535740857376
  episode_reward_mean: 52.36746064424068
  episode_reward_min: 47.28575925885523
  episodes_this_iter: 8
  episodes_total: 11504
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1150976
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002438031602650881
        max_q: 4.660001277923584
        mean_q: 4.5884904861450195
        mean_td_error: 0.020368143916130066
        min_q: 4.5181427001953125
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024480116553604603
        max_q: 4.345788955688477
        mean_q: 4.282656192779541
        mean_td_error: 0.042798176407814026
        min_q: 4.1280107498168945
    num_steps_sampled: 1150976
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1124,2260.94,1150976,52.3675,56.3654,47.2858,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-44-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.36535740857376
  episode_reward_mean: 52.02751034043838
  episode_reward_min: 47.28575925885523
  episodes_this_iter: 8
  episodes_total: 11536
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1154048
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004967062268406153
        max_q: 4.555592060089111
        mean_q: 4.455611705780029
        mean_td_error: -0.04843379557132721
        min_q: 4.386210918426514
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018249963177368045
        max_q: 4.35164737701416
        mean_q: 4.261874198913574
        mean_td_error: 0.006690368056297302
        min_q: 4.098603248596191
    num_steps_sampled: 1154048
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1127,2267.59,1154048,52.0275,56.3654,47.2858,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-44-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.00718729748708
  episode_reward_mean: 51.1213264351477
  episode_reward_min: 47.28575925885523
  episodes_this_iter: 8
  episodes_total: 11568
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1157120
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004900182597339153
        max_q: 4.5882368087768555
        mean_q: 4.486133098602295
        mean_td_error: 0.051252081990242004
        min_q: 4.319831848144531
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0031963251531124115
        max_q: 3.952138900756836
        mean_q: 3.778534412384033
        mean_td_error: -0.041205041110515594
        min_q: 3.6153156757354736
    num_steps_sampled: 1157120
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1130,2274.33,1157120,51.1213,56.0072,47.2858,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-44-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.87657141404969
  episode_reward_mean: 52.25299832557621
  episode_reward_min: 47.90872491310487
  episodes_this_iter: 16
  episodes_total: 11600
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1160192
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003101184032857418
        max_q: 4.473428726196289
        mean_q: 4.37078332901001
        mean_td_error: -0.023396998643875122
        min_q: 4.288182258605957
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0038801280315965414
        max_q: 4.044564247131348
        mean_q: 3.804060697555542
        mean_td_error: 0.04709529131650925
        min_q: 3.5868279933929443
    num_steps_sampled: 1160192
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1133,2281.34,1160192,52.253,56.8766,47.9087,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-44-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.32200851217675
  episode_reward_mean: 52.76416000222916
  episode_reward_min: 47.90872491310487
  episodes_this_iter: 8
  episodes_total: 11616
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1162240
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002878108760342002
        max_q: 4.580864906311035
        mean_q: 4.436971664428711
        mean_td_error: -0.014923393726348877
        min_q: 4.338014125823975
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011295754229649901
        max_q: 4.141970634460449
        mean_q: 4.035492897033691
        mean_td_error: 0.003032088279724121
        min_q: 3.975212812423706
    num_steps_sampled: 1162240
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1135,2286.22,1162240,52.7642,58.322,47.9087,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-44-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.32200851217675
  episode_reward_mean: 53.19676182041619
  episode_reward_min: 48.39518861333448
  episodes_this_iter: 8
  episodes_total: 11640
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1164288
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011290880851447582
        max_q: 4.4319257736206055
        mean_q: 4.173908710479736
        mean_td_error: -0.14030471444129944
        min_q: 4.010157108306885
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035922005772590637
        max_q: 4.36870813369751
        mean_q: 4.224348068237305
        mean_td_error: -0.05725805461406708
        min_q: 4.151198863983154
    num_steps_sampled: 1164288
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1137,2291.06,1164288,53.1968,58.322,48.3952,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-44-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.32200851217675
  episode_reward_mean: 53.43862281007899
  episode_reward_min: 49.85032163967015
  episodes_this_iter: 8
  episodes_total: 11656
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1166336
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005885615944862366
        max_q: 4.625087261199951
        mean_q: 4.496897220611572
        mean_td_error: 0.06708338856697083
        min_q: 4.385040283203125
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001681388821452856
        max_q: 4.428287506103516
        mean_q: 4.348382472991943
        mean_td_error: -0.009323850274085999
        min_q: 4.234986305236816
    num_steps_sampled: 1166336
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1139,2295.96,1166336,53.4386,58.322,49.8503,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-45-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.32200851217675
  episode_reward_mean: 53.17938869512541
  episode_reward_min: 49.695464597164744
  episodes_this_iter: 8
  episodes_total: 11688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1169408
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021532406099140644
        max_q: 4.578498363494873
        mean_q: 4.464637279510498
        mean_td_error: -0.008583873510360718
        min_q: 4.38348388671875
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027445058804005384
        max_q: 4.46535062789917
        mean_q: 4.362066268920898
        mean_td_error: -0.024868682026863098
        min_q: 4.282385349273682
    num_steps_sampled: 1169408
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1142,2302.87,1169408,53.1794,58.322,49.6955,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-45-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.85230672929861
  episode_reward_mean: 52.1932365599683
  episode_reward_min: 47.502656847771256
  episodes_this_iter: 8
  episodes_total: 11712
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1171456
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008226222358644009
        max_q: 4.6635942459106445
        mean_q: 4.50020694732666
        mean_td_error: -0.07223299145698547
        min_q: 4.401308059692383
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002358450321480632
        max_q: 4.454823017120361
        mean_q: 4.310943126678467
        mean_td_error: -0.02800785005092621
        min_q: 4.266254901885986
    num_steps_sampled: 1171456
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1144,2307.85,1171456,52.1932,55.8523,47.5027,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-45-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.34042178589929
  episode_reward_mean: 51.53185187918782
  episode_reward_min: 47.502656847771256
  episodes_this_iter: 16
  episodes_total: 11744
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1174528
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01623077318072319
        max_q: 4.593682765960693
        mean_q: 4.489095687866211
        mean_td_error: -0.17219027876853943
        min_q: 4.348674774169922
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017122259363532066
        max_q: 4.697816371917725
        mean_q: 4.602355480194092
        mean_td_error: 0.24659793078899384
        min_q: 4.3126397132873535
    num_steps_sampled: 1174528
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1147,2314.77,1174528,51.5319,55.3404,47.5027,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-45-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.34042178589929
  episode_reward_mean: 51.34742020929639
  episode_reward_min: 47.502656847771256
  episodes_this_iter: 8
  episodes_total: 11760
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1176576
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012994585558772087
        max_q: 4.78510046005249
        mean_q: 4.688232898712158
        mean_td_error: 0.16590651869773865
        min_q: 4.651366710662842
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004974232520908117
        max_q: 4.466136455535889
        mean_q: 4.40704345703125
        mean_td_error: 0.07610628008842468
        min_q: 4.285289287567139
    num_steps_sampled: 1176576
    num_steps_trained: 11

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1149,2319.61,1176576,51.3474,55.3404,47.5027,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-45-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.759365099108834
  episode_reward_mean: 52.30870576089265
  episode_reward_min: 47.502656847771256
  episodes_this_iter: 8
  episodes_total: 11784
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1178624
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014754598960280418
        max_q: 4.933994770050049
        mean_q: 4.849323272705078
        mean_td_error: 0.002862870693206787
        min_q: 4.746513366699219
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0072419289499521255
        max_q: 4.396198272705078
        mean_q: 4.23389196395874
        mean_td_error: -0.110578253865242
        min_q: 4.154225826263428
    num_steps_sampled: 1178624
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1151,2324.45,1178624,52.3087,57.7594,47.5027,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-45-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.759365099108834
  episode_reward_mean: 52.49599378235706
  episode_reward_min: 47.53874765063506
  episodes_this_iter: 8
  episodes_total: 11800
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1180672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013629892840981483
        max_q: 4.6931867599487305
        mean_q: 4.483814239501953
        mean_td_error: -0.20820286870002747
        min_q: 4.363042831420898
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.024148885160684586
        max_q: 4.522274971008301
        mean_q: 4.221593379974365
        mean_td_error: -0.34508663415908813
        min_q: 3.976461887359619
    num_steps_sampled: 1180672
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1153,2329.31,1180672,52.496,57.7594,47.5387,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-45-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.759365099108834
  episode_reward_mean: 52.80576076356923
  episode_reward_min: 48.26406258569982
  episodes_this_iter: 8
  episodes_total: 11824
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1182720
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009007982909679413
        max_q: 4.26383113861084
        mean_q: 4.079411506652832
        mean_td_error: -0.15106752514839172
        min_q: 4.001904487609863
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011021750047802925
        max_q: 4.03928279876709
        mean_q: 3.8124489784240723
        mean_td_error: 0.16774632036685944
        min_q: 3.521822452545166
    num_steps_sampled: 1182720
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1155,2334.16,1182720,52.8058,57.7594,48.2641,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-45-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.759365099108834
  episode_reward_mean: 53.61087535923383
  episode_reward_min: 49.62270019166419
  episodes_this_iter: 16
  episodes_total: 11856
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1185792
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013754727318882942
        max_q: 4.107705116271973
        mean_q: 3.9320690631866455
        mean_td_error: -0.23684176802635193
        min_q: 3.7582831382751465
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00448253471404314
        max_q: 3.663304090499878
        mean_q: 3.4657912254333496
        mean_td_error: -0.04850094020366669
        min_q: 3.288701295852661
    num_steps_sampled: 1185792
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1158,2341.16,1185792,53.6109,57.7594,49.6227,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-45-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.477705829158715
  episode_reward_mean: 54.22231543312744
  episode_reward_min: 49.62270019166419
  episodes_this_iter: 16
  episodes_total: 11888
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1188864
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00949825532734394
        max_q: 4.031832695007324
        mean_q: 3.8273558616638184
        mean_td_error: -0.12705287337303162
        min_q: 3.6929662227630615
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028790950309485197
        max_q: 4.019306182861328
        mean_q: 3.9091341495513916
        mean_td_error: -0.04002303630113602
        min_q: 3.7456064224243164
    num_steps_sampled: 1188864
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1161,2347.93,1188864,54.2223,57.4777,49.6227,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-45-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.3431376916253
  episode_reward_mean: 54.03428714864756
  episode_reward_min: 46.90462294835436
  episodes_this_iter: 8
  episodes_total: 11912
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1191936
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.024678677320480347
        max_q: 3.658620595932007
        mean_q: 3.3096859455108643
        mean_td_error: -0.4318998456001282
        min_q: 2.996699571609497
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0036312590818852186
        max_q: 4.024448394775391
        mean_q: 3.925475835800171
        mean_td_error: -0.054960303008556366
        min_q: 3.8682210445404053
    num_steps_sampled: 1191936
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1164,2354.53,1191936,54.0343,58.3431,46.9046,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-46-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.3431376916253
  episode_reward_mean: 53.8600859624498
  episode_reward_min: 46.90462294835436
  episodes_this_iter: 8
  episodes_total: 11944
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1195008
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014227197505533695
        max_q: 3.4028773307800293
        mean_q: 3.2159764766693115
        mean_td_error: -0.25955480337142944
        min_q: 2.9858875274658203
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006403705570846796
        max_q: 3.5369131565093994
        mean_q: 3.3981428146362305
        mean_td_error: -0.09188088774681091
        min_q: 3.296492338180542
    num_steps_sampled: 1195008
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1167,2360.95,1195008,53.8601,58.3431,46.9046,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-46-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.3431376916253
  episode_reward_mean: 52.08526939964048
  episode_reward_min: 46.90462294835436
  episodes_this_iter: 8
  episodes_total: 11976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1198080
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032180659472942352
        max_q: 3.361531972885132
        mean_q: 3.2582952976226807
        mean_td_error: -0.05071481317281723
        min_q: 3.173102617263794
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010924465022981167
        max_q: 3.135857582092285
        mean_q: 2.9339077472686768
        mean_td_error: -0.17427530884742737
        min_q: 2.738600015640259
    num_steps_sampled: 1198080
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1170,2367.35,1198080,52.0853,58.3431,46.9046,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-46-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.86002713731413
  episode_reward_mean: 51.31095651196221
  episode_reward_min: 48.591105825893024
  episodes_this_iter: 8
  episodes_total: 12008
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1201152
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009315219940617681
        max_q: 3.5845062732696533
        mean_q: 3.5592751502990723
        mean_td_error: -0.015165291726589203
        min_q: 3.521209716796875
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01625673845410347
        max_q: 3.290010452270508
        mean_q: 2.959073543548584
        mean_td_error: -0.1757095903158188
        min_q: 2.727358341217041
    num_steps_sampled: 1201152
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1173,2373.79,1201152,51.311,53.86,48.5911,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-46-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.18924413504628
  episode_reward_mean: 50.67016999520572
  episode_reward_min: 47.073224959878694
  episodes_this_iter: 8
  episodes_total: 12040
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1204224
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027203757781535387
        max_q: 3.7904810905456543
        mean_q: 3.7485709190368652
        mean_td_error: 0.04575968533754349
        min_q: 3.7070693969726562
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06256809085607529
        max_q: 2.8745975494384766
        mean_q: 2.5432028770446777
        mean_td_error: -0.7516661286354065
        min_q: 2.3145627975463867
    num_steps_sampled: 1204224
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1176,2380.22,1204224,50.6702,54.1892,47.0732,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-46-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.15363386790486
  episode_reward_mean: 51.685751381800394
  episode_reward_min: 47.073224959878694
  episodes_this_iter: 16
  episodes_total: 12072
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1207296
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004195316229015589
        max_q: 4.0320916175842285
        mean_q: 3.9918761253356934
        mean_td_error: 0.07207448035478592
        min_q: 3.9373364448547363
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01460008230060339
        max_q: 2.751765727996826
        mean_q: 2.421558380126953
        mean_td_error: -0.17627498507499695
        min_q: 2.203282356262207
    num_steps_sampled: 1207296
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1179,2386.66,1207296,51.6858,57.1536,47.0732,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-46-38
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.15363386790486
  episode_reward_mean: 51.67686060734964
  episode_reward_min: 45.803688779202595
  episodes_this_iter: 8
  episodes_total: 12096
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1210368
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004676806274801493
        max_q: 4.267726421356201
        mean_q: 4.176116466522217
        mean_td_error: -0.0755084753036499
        min_q: 4.097114086151123
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.027593038976192474
        max_q: 2.1783266067504883
        mean_q: 2.010162353515625
        mean_td_error: -0.35456913709640503
        min_q: 1.8530077934265137
    num_steps_sampled: 1210368
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1182,2393.09,1210368,51.6769,57.1536,45.8037,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-46-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.15363386790486
  episode_reward_mean: 52.122642060502464
  episode_reward_min: 45.45440025385614
  episodes_this_iter: 8
  episodes_total: 12128
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1213440
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0031199019867926836
        max_q: 4.187150955200195
        mean_q: 4.022675514221191
        mean_td_error: 0.02931831032037735
        min_q: 3.8981642723083496
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0031637458596378565
        max_q: 2.44808030128479
        mean_q: 2.1908814907073975
        mean_td_error: 0.007365062832832336
        min_q: 1.962819218635559
    num_steps_sampled: 1213440
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1185,2399.96,1213440,52.1226,57.1536,45.4544,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-46-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.15317692587933
  episode_reward_mean: 51.80380656389246
  episode_reward_min: 44.364690620021825
  episodes_this_iter: 8
  episodes_total: 12160
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1216512
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005106344353407621
        max_q: 3.8088645935058594
        mean_q: 3.5489046573638916
        mean_td_error: 0.04079268127679825
        min_q: 3.19877290725708
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007930980063974857
        max_q: 2.689002513885498
        mean_q: 2.4573590755462646
        mean_td_error: -0.09732517600059509
        min_q: 2.3391456604003906
    num_steps_sampled: 1216512
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1188,2407,1216512,51.8038,57.1532,44.3647,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-47-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.79037547852658
  episode_reward_mean: 51.59623926842433
  episode_reward_min: 44.364690620021825
  episodes_this_iter: 8
  episodes_total: 12192
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1219584
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03633606433868408
        max_q: 3.0425682067871094
        mean_q: 2.857414722442627
        mean_td_error: 0.5465530157089233
        min_q: 2.5487732887268066
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03913499042391777
        max_q: 2.3897061347961426
        mean_q: 2.1791417598724365
        mean_td_error: -0.4630623161792755
        min_q: 2.0014407634735107
    num_steps_sampled: 1219584
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1191,2413.95,1219584,51.5962,55.7904,44.3647,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-47-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.07352766111397
  episode_reward_mean: 51.2234155217851
  episode_reward_min: 44.364690620021825
  episodes_this_iter: 8
  episodes_total: 12224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1222656
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0034003742039203644
        max_q: 2.92631196975708
        mean_q: 2.796712875366211
        mean_td_error: -0.03766905516386032
        min_q: 2.728295087814331
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0178459994494915
        max_q: 2.070481061935425
        mean_q: 1.7566334009170532
        mean_td_error: -0.18592146039009094
        min_q: 1.524181604385376
    num_steps_sampled: 1222656
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1194,2420.75,1222656,51.2234,54.0735,44.3647,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-47-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.13385345441498
  episode_reward_mean: 50.99300668097911
  episode_reward_min: 44.478916013671174
  episodes_this_iter: 16
  episodes_total: 12256
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1225728
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008312586694955826
        max_q: 3.3338046073913574
        mean_q: 3.2573587894439697
        mean_td_error: -0.12160509079694748
        min_q: 3.1639339923858643
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004869086667895317
        max_q: 1.899535894393921
        mean_q: 1.7876534461975098
        mean_td_error: -0.06526714563369751
        min_q: 1.6158373355865479
    num_steps_sampled: 1225728
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1197,2427.41,1225728,50.993,57.1339,44.4789,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-47-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.5287994292257
  episode_reward_mean: 51.257511800010306
  episode_reward_min: 45.52572795767952
  episodes_this_iter: 16
  episodes_total: 12288
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1228800
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004241260699927807
        max_q: 3.436704397201538
        mean_q: 3.338573694229126
        mean_td_error: -0.06550270318984985
        min_q: 3.2527923583984375
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009135765954852104
        max_q: 2.361738681793213
        mean_q: 2.1997528076171875
        mean_td_error: -0.11597459018230438
        min_q: 1.9779877662658691
    num_steps_sampled: 1228800
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1200,2433.85,1228800,51.2575,57.5288,45.5257,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-47-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.5287994292257
  episode_reward_mean: 51.344602427132486
  episode_reward_min: 45.52572795767952
  episodes_this_iter: 8
  episodes_total: 12312
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1231872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006636279635131359
        max_q: 3.477938413619995
        mean_q: 3.3621532917022705
        mean_td_error: -0.09847938269376755
        min_q: 3.212815999984741
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015154599212110043
        max_q: 2.6256840229034424
        mean_q: 2.5989527702331543
        mean_td_error: -0.19594058394432068
        min_q: 2.5743567943573
    num_steps_sampled: 1231872
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1203,2440.3,1231872,51.3446,57.5288,45.5257,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-47-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.08955533448129
  episode_reward_mean: 51.29368306869994
  episode_reward_min: 45.52572795767952
  episodes_this_iter: 8
  episodes_total: 12344
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1234944
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007207510992884636
        max_q: 3.4130473136901855
        mean_q: 3.2507245540618896
        mean_td_error: -0.09577711671590805
        min_q: 3.0916244983673096
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009099517948925495
        max_q: 3.1309924125671387
        mean_q: 3.013975143432617
        mean_td_error: -0.11659488826990128
        min_q: 2.89174485206604
    num_steps_sampled: 1234944
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1206,2446.7,1234944,51.2937,60.0896,45.5257,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-47-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.08955533448129
  episode_reward_mean: 52.082170360220616
  episode_reward_min: 45.7340072092928
  episodes_this_iter: 8
  episodes_total: 12376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1238016
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012384626083076
        max_q: 3.25494384765625
        mean_q: 3.057451009750366
        mean_td_error: -0.1664656549692154
        min_q: 2.9680864810943604
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008332331664860249
        max_q: 2.840641975402832
        mean_q: 2.4819648265838623
        mean_td_error: -0.0904507264494896
        min_q: 2.244072914123535
    num_steps_sampled: 1238016
    num_steps_trained: 123

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1209,2453.17,1238016,52.0822,60.0896,45.734,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-47-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.08955533448129
  episode_reward_mean: 52.89028687626458
  episode_reward_min: 48.456593232642945
  episodes_this_iter: 8
  episodes_total: 12408
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1241088
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00566426245495677
        max_q: 3.0282421112060547
        mean_q: 2.8767826557159424
        mean_td_error: 0.06666792929172516
        min_q: 2.722590684890747
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03155268728733063
        max_q: 2.925656795501709
        mean_q: 2.458134412765503
        mean_td_error: -0.3159443736076355
        min_q: 2.0184519290924072
    num_steps_sampled: 1241088
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1212,2459.8,1241088,52.8903,60.0896,48.4566,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-47-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.43128018313213
  episode_reward_mean: 52.47733550604695
  episode_reward_min: 44.66198686416146
  episodes_this_iter: 16
  episodes_total: 12440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1244160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005125923082232475
        max_q: 3.454556941986084
        mean_q: 3.302239418029785
        mean_td_error: -0.06764718890190125
        min_q: 3.0359935760498047
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013408979400992393
        max_q: 1.8364006280899048
        mean_q: 1.5246474742889404
        mean_td_error: -0.13353514671325684
        min_q: 1.366167426109314
    num_steps_sampled: 1244160
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1215,2466.48,1244160,52.4773,59.4313,44.662,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-48-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.43128018313213
  episode_reward_mean: 52.6665076695026
  episode_reward_min: 44.66198686416146
  episodes_this_iter: 16
  episodes_total: 12472
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1247232
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019805120304226875
        max_q: 3.447707176208496
        mean_q: 3.2198784351348877
        mean_td_error: -0.26493388414382935
        min_q: 2.7602992057800293
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010089782997965813
        max_q: 1.5320184230804443
        mean_q: 1.1332683563232422
        mean_td_error: -0.045756105333566666
        min_q: 0.7013863325119019
    num_steps_sampled: 1247232
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1218,2473.43,1247232,52.6665,59.4313,44.662,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-48-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.43128018313213
  episode_reward_mean: 52.7988613904235
  episode_reward_min: 44.66198686416146
  episodes_this_iter: 8
  episodes_total: 12496
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1250304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.027071895077824593
        max_q: 2.695249080657959
        mean_q: 2.529465913772583
        mean_td_error: -0.37793397903442383
        min_q: 2.296877145767212
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017518170177936554
        max_q: 1.0528311729431152
        mean_q: 0.25515076518058777
        mean_td_error: -0.1308189034461975
        min_q: -0.07532304525375366
    num_steps_sampled: 1250304
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1221,2480.37,1250304,52.7989,59.4313,44.662,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-48-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.79146459796493
  episode_reward_mean: 52.33659918983088
  episode_reward_min: 44.66198686416146
  episodes_this_iter: 8
  episodes_total: 12528
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1253376
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035060665104538202
        max_q: 3.179788827896118
        mean_q: 3.0614116191864014
        mean_td_error: 0.04741349071264267
        min_q: 2.950007200241089
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017069358378648758
        max_q: 0.9491081833839417
        mean_q: 0.7743636965751648
        mean_td_error: 0.15432843565940857
        min_q: 0.572418749332428
    num_steps_sampled: 1253376
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1224,2487.13,1253376,52.3366,56.7915,44.662,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-48-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.95792248959653
  episode_reward_mean: 53.39510291832633
  episode_reward_min: 48.28704325488793
  episodes_this_iter: 8
  episodes_total: 12560
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1256448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003207877976819873
        max_q: 2.783566951751709
        mean_q: 2.5421836376190186
        mean_td_error: -0.026004455983638763
        min_q: 2.3049185276031494
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012084057554602623
        max_q: 2.029710292816162
        mean_q: 1.8140267133712769
        mean_td_error: 0.1001783162355423
        min_q: 1.6395899057388306
    num_steps_sampled: 1256448
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1227,2493.82,1256448,53.3951,58.9579,48.287,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-48-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.95792248959653
  episode_reward_mean: 53.127340424460684
  episode_reward_min: 48.28704325488793
  episodes_this_iter: 8
  episodes_total: 12592
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1259520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002631545066833496
        max_q: 1.9372481107711792
        mean_q: 1.7202508449554443
        mean_td_error: 0.00868343934416771
        min_q: 1.441111445426941
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001239806180819869
        max_q: 2.7859249114990234
        mean_q: 2.7090253829956055
        mean_td_error: 0.010741837322711945
        min_q: 2.562492847442627
    num_steps_sampled: 1259520
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1230,2500.73,1259520,53.1273,58.9579,48.287,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-48-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.95792248959653
  episode_reward_mean: 53.139725574059206
  episode_reward_min: 48.28704325488793
  episodes_this_iter: 16
  episodes_total: 12624
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1262592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028290702030062675
        max_q: 2.2591910362243652
        mean_q: 2.1713027954101562
        mean_td_error: -0.026956401765346527
        min_q: 2.0176122188568115
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015323790721595287
        max_q: 3.9128308296203613
        mean_q: 3.7926225662231445
        mean_td_error: 0.1778656244277954
        min_q: 3.6409027576446533
    num_steps_sampled: 1262592
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1233,2507.4,1262592,53.1397,58.9579,48.287,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-48-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.864426671714085
  episode_reward_mean: 52.07425935669637
  episode_reward_min: 47.2468124511907
  episodes_this_iter: 16
  episodes_total: 12656
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1265664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011613517999649048
        max_q: 2.401956796646118
        mean_q: 2.2655298709869385
        mean_td_error: -0.15277013182640076
        min_q: 2.116990566253662
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00865060742944479
        max_q: 4.56846809387207
        mean_q: 4.520336151123047
        mean_td_error: 0.10877831280231476
        min_q: 4.387163162231445
    num_steps_sampled: 1265664
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1236,2514.13,1265664,52.0743,55.8644,47.2468,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-48-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.298370289065765
  episode_reward_mean: 51.78736343472588
  episode_reward_min: 47.2468124511907
  episodes_this_iter: 8
  episodes_total: 12680
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1268736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015159898437559605
        max_q: 2.6892621517181396
        mean_q: 2.558840274810791
        mean_td_error: -0.17061647772789001
        min_q: 2.414264440536499
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002002315130084753
        max_q: 4.953194618225098
        mean_q: 4.885889530181885
        mean_td_error: 0.014788269996643066
        min_q: 4.82349967956543
    num_steps_sampled: 1268736
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1239,2520.86,1268736,51.7874,56.2984,47.2468,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-48-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.08144964703398
  episode_reward_mean: 52.28823302151786
  episode_reward_min: 47.2468124511907
  episodes_this_iter: 8
  episodes_total: 12712
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1271808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005300296936184168
        max_q: 2.894559860229492
        mean_q: 2.778646945953369
        mean_td_error: -0.06200743466615677
        min_q: 2.7082719802856445
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003995268605649471
        max_q: 5.292677402496338
        mean_q: 5.238951683044434
        mean_td_error: -0.04657617211341858
        min_q: 5.192610740661621
    num_steps_sampled: 1271808
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1242,2527.54,1271808,52.2882,57.0814,47.2468,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-49-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.08144964703398
  episode_reward_mean: 52.676366526870076
  episode_reward_min: 47.33122381670986
  episodes_this_iter: 8
  episodes_total: 12744
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1274880
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01040101982653141
        max_q: 3.0659584999084473
        mean_q: 2.906782865524292
        mean_td_error: -0.13771475851535797
        min_q: 2.774829387664795
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032091073226183653
        max_q: 5.419933319091797
        mean_q: 5.372036457061768
        mean_td_error: 0.03348766267299652
        min_q: 5.317708492279053
    num_steps_sampled: 1274880
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1245,2534.26,1274880,52.6764,57.0814,47.3312,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-49-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.08144964703398
  episode_reward_mean: 53.28017499460332
  episode_reward_min: 48.4833595188969
  episodes_this_iter: 8
  episodes_total: 12776
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1277952
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012486571446061134
        max_q: 3.024650812149048
        mean_q: 2.8255860805511475
        mean_td_error: -0.16069373488426208
        min_q: 2.717348337173462
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0040291110053658485
        max_q: 5.51936674118042
        mean_q: 5.464447498321533
        mean_td_error: 0.05340449512004852
        min_q: 5.377363204956055
    num_steps_sampled: 1277952
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1248,2541.03,1277952,53.2802,57.0814,48.4834,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-49-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.93729477414456
  episode_reward_mean: 53.48758775561624
  episode_reward_min: 48.4833595188969
  episodes_this_iter: 8
  episodes_total: 12808
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1281024
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001309182494878769
        max_q: 3.1285626888275146
        mean_q: 3.0395774841308594
        mean_td_error: 0.026846468448638916
        min_q: 2.983461856842041
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033047867473214865
        max_q: 5.332508087158203
        mean_q: 5.263705730438232
        mean_td_error: -0.035719841718673706
        min_q: 5.147521018981934
    num_steps_sampled: 1281024
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1251,2547.72,1281024,53.4876,56.9373,48.4834,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-49-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.639562433800734
  episode_reward_mean: 52.556223418933385
  episode_reward_min: 48.213785457844175
  episodes_this_iter: 16
  episodes_total: 12840
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1284096
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026219687424600124
        max_q: 3.5971531867980957
        mean_q: 3.5195395946502686
        mean_td_error: 0.06145060807466507
        min_q: 3.4551048278808594
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004937193356454372
        max_q: 5.01301383972168
        mean_q: 4.965220928192139
        mean_td_error: -0.055894866585731506
        min_q: 4.925190448760986
    num_steps_sampled: 1284096
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1254,2554.41,1284096,52.5562,56.6396,48.2138,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-49-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.639562433800734
  episode_reward_mean: 52.565173766935494
  episode_reward_min: 48.213785457844175
  episodes_this_iter: 8
  episodes_total: 12864
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1287168
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0045981635339558125
        max_q: 3.8230795860290527
        mean_q: 3.6996607780456543
        mean_td_error: -0.08531734347343445
        min_q: 3.636910915374756
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022136892657727003
        max_q: 4.7595014572143555
        mean_q: 4.706446170806885
        mean_td_error: -0.016551092267036438
        min_q: 4.640475749969482
    num_steps_sampled: 1287168
    num_steps

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1257,2561.15,1287168,52.5652,56.6396,48.2138,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-49-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.639562433800734
  episode_reward_mean: 52.129736462094094
  episode_reward_min: 48.213785457844175
  episodes_this_iter: 8
  episodes_total: 12896
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1290240
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008884967304766178
        max_q: 3.9619557857513428
        mean_q: 3.8729538917541504
        mean_td_error: -0.18088263273239136
        min_q: 3.7200400829315186
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004271204583346844
        max_q: 4.679746627807617
        mean_q: 4.536755084991455
        mean_td_error: -0.054721489548683167
        min_q: 4.501882553100586
    num_steps_sampled: 1290240
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1260,2567.82,1290240,52.1297,56.6396,48.2138,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-49-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.65158480034707
  episode_reward_mean: 51.754393645333195
  episode_reward_min: 47.15540885258463
  episodes_this_iter: 8
  episodes_total: 12928
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1293312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0007639267714694142
        max_q: 4.188595294952393
        mean_q: 4.082658767700195
        mean_td_error: 0.008049540221691132
        min_q: 3.980262041091919
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004019887186586857
        max_q: 4.599554061889648
        mean_q: 4.53589391708374
        mean_td_error: 0.05227816104888916
        min_q: 4.450600624084473
    num_steps_sampled: 1293312
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1263,2574.3,1293312,51.7544,54.6516,47.1554,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-49-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.65158480034707
  episode_reward_mean: 51.27213827896936
  episode_reward_min: 47.15540885258463
  episodes_this_iter: 8
  episodes_total: 12960
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1296384
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002426321618258953
        max_q: 4.384326934814453
        mean_q: 4.3137736320495605
        mean_td_error: -0.04796455800533295
        min_q: 4.2292704582214355
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017473818734288216
        max_q: 4.710078239440918
        mean_q: 4.608181476593018
        mean_td_error: 0.011084094643592834
        min_q: 4.5184783935546875
    num_steps_sampled: 1296384
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1266,2580.73,1296384,51.2721,54.6516,47.1554,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-49-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.65158480034707
  episode_reward_mean: 50.89424090819154
  episode_reward_min: 47.15540885258463
  episodes_this_iter: 8
  episodes_total: 12992
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1299456
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003222531406208873
        max_q: 4.627963066101074
        mean_q: 4.536249160766602
        mean_td_error: 0.05334959924221039
        min_q: 4.452662944793701
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005919723771512508
        max_q: 4.752931118011475
        mean_q: 4.665920257568359
        mean_td_error: -0.07354874908924103
        min_q: 4.597704887390137
    num_steps_sampled: 1299456
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1269,2587.21,1299456,50.8942,54.6516,47.1554,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-50-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.5122149532253
  episode_reward_mean: 50.84034559876729
  episode_reward_min: 48.14266782503657
  episodes_this_iter: 16
  episodes_total: 13024
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1302528
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002754596993327141
        max_q: 4.734686374664307
        mean_q: 4.500166416168213
        mean_td_error: 0.02887958288192749
        min_q: 4.346883773803711
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006255680229514837
        max_q: 4.561575412750244
        mean_q: 4.446962356567383
        mean_td_error: -0.09937611222267151
        min_q: 4.3432230949401855
    num_steps_sampled: 1302528
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1272,2594.11,1302528,50.8403,54.5122,48.1427,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-50-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.00330764677271
  episode_reward_mean: 51.36862351204908
  episode_reward_min: 48.42828618911062
  episodes_this_iter: 16
  episodes_total: 13056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1305600
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0074124690145254135
        max_q: 4.431029319763184
        mean_q: 4.372403144836426
        mean_td_error: -0.11472463607788086
        min_q: 4.31256103515625
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035866599064320326
        max_q: 4.456803321838379
        mean_q: 4.339803695678711
        mean_td_error: -0.04979658126831055
        min_q: 4.251926422119141
    num_steps_sampled: 1305600
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1275,2600.83,1305600,51.3686,55.0033,48.4283,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-50-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.00330764677271
  episode_reward_mean: 51.750820271966525
  episode_reward_min: 48.42828618911062
  episodes_this_iter: 8
  episodes_total: 13080
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1308672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028334783855825663
        max_q: 4.504258632659912
        mean_q: 4.431944847106934
        mean_td_error: -0.04209578037261963
        min_q: 4.34140157699585
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030570661183446646
        max_q: 4.462477207183838
        mean_q: 4.3605241775512695
        mean_td_error: -0.03851301968097687
        min_q: 4.254366874694824
    num_steps_sampled: 1308672
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1278,2608.05,1308672,51.7508,55.0033,48.4283,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-50-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.00330764677271
  episode_reward_mean: 51.15090330890867
  episode_reward_min: 47.8483545288115
  episodes_this_iter: 8
  episodes_total: 13104
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1310720
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00429148692637682
        max_q: 4.466419219970703
        mean_q: 4.390076637268066
        mean_td_error: -0.059349581599235535
        min_q: 4.260838985443115
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011844036635011435
        max_q: 4.441067695617676
        mean_q: 4.384147644042969
        mean_td_error: -0.005268797278404236
        min_q: 4.289106369018555
    num_steps_sampled: 1310720
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1280,2613.19,1310720,51.1509,55.0033,47.8484,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-50-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.00330764677271
  episode_reward_mean: 51.02823434199724
  episode_reward_min: 47.8483545288115
  episodes_this_iter: 8
  episodes_total: 13120
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1312768
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004248482175171375
        max_q: 4.18949556350708
        mean_q: 3.945919990539551
        mean_td_error: -0.06407827138900757
        min_q: 3.8718552589416504
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001991471741348505
        max_q: 4.471137523651123
        mean_q: 4.414050579071045
        mean_td_error: -0.032375603914260864
        min_q: 4.313827991485596
    num_steps_sampled: 1312768
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1282,2618.34,1312768,51.0282,55.0033,47.8484,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-50-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.317023691399186
  episode_reward_mean: 50.97799230279399
  episode_reward_min: 47.8483545288115
  episodes_this_iter: 8
  episodes_total: 13144
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1314816
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01663823053240776
        max_q: 4.359951972961426
        mean_q: 4.235579490661621
        mean_td_error: 0.24517764151096344
        min_q: 3.9714407920837402
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019077785545960069
        max_q: 4.480999946594238
        mean_q: 4.3770670890808105
        mean_td_error: -0.0031004250049591064
        min_q: 4.299979209899902
    num_steps_sampled: 1314816
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1284,2623.49,1314816,50.978,56.317,47.8484,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-50-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.317023691399186
  episode_reward_mean: 50.38595880529414
  episode_reward_min: 46.84131067669748
  episodes_this_iter: 16
  episodes_total: 13168
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1316864
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004674037452787161
        max_q: 4.092456340789795
        mean_q: 4.001408576965332
        mean_td_error: -0.06903663277626038
        min_q: 3.910738945007324
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024221704807132483
        max_q: 4.498902797698975
        mean_q: 4.426998615264893
        mean_td_error: -0.041846200823783875
        min_q: 4.315975189208984
    num_steps_sampled: 1316864
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1286,2628.77,1316864,50.386,56.317,46.8413,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-50-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.317023691399186
  episode_reward_mean: 50.166571541310525
  episode_reward_min: 46.84131067669748
  episodes_this_iter: 8
  episodes_total: 13184
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1318912
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017111895140260458
        max_q: 4.202644348144531
        mean_q: 4.121676445007324
        mean_td_error: -0.008967697620391846
        min_q: 4.074604511260986
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027350825257599354
        max_q: 4.5354838371276855
        mean_q: 4.475594520568848
        mean_td_error: 0.04316629469394684
        min_q: 4.327075481414795
    num_steps_sampled: 1318912
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1288,2634.19,1318912,50.1666,56.317,46.8413,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-50-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.317023691399186
  episode_reward_mean: 50.71623457051626
  episode_reward_min: 46.84131067669748
  episodes_this_iter: 16
  episodes_total: 13208
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1320960
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012971346732228994
        max_q: 4.460422992706299
        mean_q: 4.332300186157227
        mean_td_error: 0.020393073558807373
        min_q: 4.256678581237793
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009060107171535492
        max_q: 4.5578718185424805
        mean_q: 4.509690284729004
        mean_td_error: -0.14733321964740753
        min_q: 4.348132133483887
    num_steps_sampled: 1320960
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1290,2639.47,1320960,50.7162,56.317,46.8413,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-50-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.669056721601
  episode_reward_mean: 51.34852821091418
  episode_reward_min: 46.84131067669748
  episodes_this_iter: 8
  episodes_total: 13224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1323008
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0048475381918251514
        max_q: 4.37740421295166
        mean_q: 4.231207370758057
        mean_td_error: -0.06792199611663818
        min_q: 4.124995231628418
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016646879957988858
        max_q: 4.611179351806641
        mean_q: 4.525633811950684
        mean_td_error: -0.016039922833442688
        min_q: 4.447995662689209
    num_steps_sampled: 1323008
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1292,2644.52,1323008,51.3485,57.6691,46.8413,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-51-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.669056721601
  episode_reward_mean: 51.76442725197396
  episode_reward_min: 46.88011393773871
  episodes_this_iter: 8
  episodes_total: 13248
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1325056
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030467966571450233
        max_q: 4.4080047607421875
        mean_q: 4.187948226928711
        mean_td_error: -0.016176477074623108
        min_q: 4.096118927001953
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002792316721752286
        max_q: 4.73312520980835
        mean_q: 4.5558342933654785
        mean_td_error: -0.025939777493476868
        min_q: 4.531570911407471
    num_steps_sampled: 1325056
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1294,2649.5,1325056,51.7644,57.6691,46.8801,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-51-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.669056721601
  episode_reward_mean: 52.047950248255034
  episode_reward_min: 47.87546833508171
  episodes_this_iter: 8
  episodes_total: 13264
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1327104
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002775390399619937
        max_q: 4.291676998138428
        mean_q: 4.183541774749756
        mean_td_error: -0.04614850878715515
        min_q: 4.0589189529418945
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023612312506884336
        max_q: 4.668064594268799
        mean_q: 4.632354736328125
        mean_td_error: -0.030591413378715515
        min_q: 4.514413356781006
    num_steps_sampled: 1327104
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1296,2654.44,1327104,52.048,57.6691,47.8755,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-51-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.669056721601
  episode_reward_mean: 52.77502090005491
  episode_reward_min: 47.87546833508171
  episodes_this_iter: 8
  episodes_total: 13296
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1330176
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015966016799211502
        max_q: 4.277773380279541
        mean_q: 3.887571334838867
        mean_td_error: -0.262593537569046
        min_q: 3.7037718296051025
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007080786861479282
        max_q: 4.545098781585693
        mean_q: 4.4570512771606445
        mean_td_error: -0.10209532082080841
        min_q: 4.398683547973633
    num_steps_sampled: 1330176
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1299,2661.55,1330176,52.775,57.6691,47.8755,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-51-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.51567512819125
  episode_reward_mean: 52.65982738295647
  episode_reward_min: 48.32071104972422
  episodes_this_iter: 8
  episodes_total: 13328
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1333248
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.039248064160346985
        max_q: 4.2765278816223145
        mean_q: 3.689222812652588
        mean_td_error: -0.42857930064201355
        min_q: 3.4416725635528564
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007515835575759411
        max_q: 4.796077728271484
        mean_q: 4.663634300231934
        mean_td_error: 0.1099606305360794
        min_q: 4.491237163543701
    num_steps_sampled: 1333248
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1302,2668.2,1333248,52.6598,56.5157,48.3207,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-51-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.51567512819125
  episode_reward_mean: 52.19850059514891
  episode_reward_min: 48.32071104972422
  episodes_this_iter: 8
  episodes_total: 13360
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1336320
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01151255052536726
        max_q: 2.935817241668701
        mean_q: 2.4967761039733887
        mean_td_error: -0.10892873257398605
        min_q: 2.124908685684204
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013495663879439235
        max_q: 4.3241963386535645
        mean_q: 4.19478702545166
        mean_td_error: -0.006394684314727783
        min_q: 4.005157947540283
    num_steps_sampled: 1336320
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1305,2674.68,1336320,52.1985,56.5157,48.3207,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-51-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.27628578554522
  episode_reward_mean: 51.39561172225208
  episode_reward_min: 48.37155858445563
  episodes_this_iter: 16
  episodes_total: 13392
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1339392
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013228856027126312
        max_q: 2.9663889408111572
        mean_q: 2.842423439025879
        mean_td_error: -0.14427968859672546
        min_q: 2.7595009803771973
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0038721731398254633
        max_q: 4.348645210266113
        mean_q: 4.193782329559326
        mean_td_error: -0.05221882462501526
        min_q: 4.043551445007324
    num_steps_sampled: 1339392
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1308,2681,1339392,51.3956,55.2763,48.3716,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-51-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.27628578554522
  episode_reward_mean: 50.082561910402
  episode_reward_min: 46.48023807448176
  episodes_this_iter: 16
  episodes_total: 13424
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1342464
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007506473455578089
        max_q: 3.2486963272094727
        mean_q: 3.156181573867798
        mean_td_error: -0.08108531683683395
        min_q: 3.0419321060180664
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008182594552636147
        max_q: 4.230213642120361
        mean_q: 4.175407409667969
        mean_td_error: -0.11945948004722595
        min_q: 4.063755512237549
    num_steps_sampled: 1342464
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1311,2687.21,1342464,50.0826,55.2763,46.4802,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-51-49
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.27628578554522
  episode_reward_mean: 50.144802079363124
  episode_reward_min: 46.48023807448176
  episodes_this_iter: 8
  episodes_total: 13448
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1345536
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023027189075946808
        max_q: 3.2431507110595703
        mean_q: 3.1005780696868896
        mean_td_error: -0.23655954003334045
        min_q: 2.996013879776001
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011721532791852951
        max_q: 4.414183139801025
        mean_q: 4.234747886657715
        mean_td_error: -0.22203752398490906
        min_q: 4.08508825302124
    num_steps_sampled: 1345536
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1314,2693.45,1345536,50.1448,55.2763,46.4802,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-51-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.631622797127406
  episode_reward_mean: 49.685400797074045
  episode_reward_min: 46.48023807448176
  episodes_this_iter: 8
  episodes_total: 13480
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1348608
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005974168423563242
        max_q: 3.4429774284362793
        mean_q: 3.3748178482055664
        mean_td_error: -0.07983377575874329
        min_q: 3.300184726715088
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00556968292221427
        max_q: 4.380059242248535
        mean_q: 4.267270088195801
        mean_td_error: -0.10408025979995728
        min_q: 4.189306259155273
    num_steps_sampled: 1348608
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1317,2699.81,1348608,49.6854,55.6316,46.4802,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-52-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.69819483795176
  episode_reward_mean: 50.44883406832175
  episode_reward_min: 47.7002581169399
  episodes_this_iter: 8
  episodes_total: 13512
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1351680
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00573864346370101
        max_q: 3.5901811122894287
        mean_q: 3.4456324577331543
        mean_td_error: -0.08649495244026184
        min_q: 3.3459367752075195
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006154653616249561
        max_q: 4.460591793060303
        mean_q: 4.381373882293701
        mean_td_error: -0.08468034863471985
        min_q: 4.317709445953369
    num_steps_sampled: 1351680
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1320,2706.31,1351680,50.4488,55.6982,47.7003,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-52-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.69819483795176
  episode_reward_mean: 50.598613336703835
  episode_reward_min: 46.93847580978903
  episodes_this_iter: 8
  episodes_total: 13544
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1354752
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.000985003192909062
        max_q: 3.733696460723877
        mean_q: 3.665329933166504
        mean_td_error: 0.0007857158780097961
        min_q: 3.533036231994629
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011018230579793453
        max_q: 4.250279903411865
        mean_q: 4.189303398132324
        mean_td_error: 0.000787094235420227
        min_q: 4.093791961669922
    num_steps_sampled: 1354752
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1323,2712.84,1354752,50.5986,55.6982,46.9385,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-52-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.69819483795176
  episode_reward_mean: 50.70544607911769
  episode_reward_min: 46.93847580978903
  episodes_this_iter: 8
  episodes_total: 13576
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1357824
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030534209217876196
        max_q: 3.947098731994629
        mean_q: 3.902968168258667
        mean_td_error: -0.044732481241226196
        min_q: 3.831263542175293
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003946004435420036
        max_q: 4.320192337036133
        mean_q: 4.226983070373535
        mean_td_error: -0.058559417724609375
        min_q: 4.034234046936035
    num_steps_sampled: 1357824
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1326,2719.74,1357824,50.7054,55.6982,46.9385,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-52-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.047391063002664
  episode_reward_mean: 51.41190282333354
  episode_reward_min: 46.93847580978903
  episodes_this_iter: 16
  episodes_total: 13608
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1360896
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027119857259094715
        max_q: 3.876190185546875
        mean_q: 3.7809925079345703
        mean_td_error: -0.03502470254898071
        min_q: 3.687281370162964
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007978702895343304
        max_q: 4.188378810882568
        mean_q: 3.6595306396484375
        mean_td_error: -0.05166994780302048
        min_q: 3.1589512825012207
    num_steps_sampled: 1360896
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1329,2726.77,1360896,51.4119,58.0474,46.9385,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-52-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.047391063002664
  episode_reward_mean: 52.12289089708322
  episode_reward_min: 46.93847580978903
  episodes_this_iter: 8
  episodes_total: 13632
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1363968
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002708820393308997
        max_q: 4.055807590484619
        mean_q: 3.9837119579315186
        mean_td_error: -0.032487593591213226
        min_q: 3.8794193267822266
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018438855186104774
        max_q: 2.8063392639160156
        mean_q: 2.664898157119751
        mean_td_error: -0.3365741968154907
        min_q: 2.436795234680176
    num_steps_sampled: 1363968
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1332,2733.87,1363968,52.1229,58.0474,46.9385,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-52-38
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.047391063002664
  episode_reward_mean: 52.93824057408924
  episode_reward_min: 47.38607512567272
  episodes_this_iter: 8
  episodes_total: 13664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1367040
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004187299869954586
        max_q: 4.07034158706665
        mean_q: 3.9885294437408447
        mean_td_error: -0.06180385500192642
        min_q: 3.8948869705200195
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004891176242381334
        max_q: 2.747835159301758
        mean_q: 2.6385467052459717
        mean_td_error: -0.09120089560747147
        min_q: 2.5492098331451416
    num_steps_sampled: 1367040
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1335,2740.64,1367040,52.9382,58.0474,47.3861,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-52-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.047391063002664
  episode_reward_mean: 52.60628549201339
  episode_reward_min: 47.83117293060332
  episodes_this_iter: 8
  episodes_total: 13696
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1370112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001984745729714632
        max_q: 4.145044326782227
        mean_q: 4.052134037017822
        mean_td_error: -0.005379386246204376
        min_q: 3.911100387573242
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003324841847643256
        max_q: 3.2313923835754395
        mean_q: 3.0815038681030273
        mean_td_error: 0.04952256381511688
        min_q: 2.7684876918792725
    num_steps_sampled: 1370112
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1338,2748.09,1370112,52.6063,58.0474,47.8312,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-52-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.667567516504064
  episode_reward_mean: 51.22047650649666
  episode_reward_min: 47.83117293060332
  episodes_this_iter: 16
  episodes_total: 13720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1372160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0060859378427267075
        max_q: 4.181080341339111
        mean_q: 4.026124477386475
        mean_td_error: -0.08218543231487274
        min_q: 3.940704822540283
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002871851669624448
        max_q: 3.475999355316162
        mean_q: 3.3023927211761475
        mean_td_error: -0.024986632168293
        min_q: 3.2336835861206055
    num_steps_sampled: 1372160
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1340,2753.52,1372160,51.2205,57.6676,47.8312,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-52-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.014628156215586
  episode_reward_mean: 50.96033732296376
  episode_reward_min: 47.83117293060332
  episodes_this_iter: 8
  episodes_total: 13736
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1374208
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0066472003236413
        max_q: 4.040981292724609
        mean_q: 3.80686616897583
        mean_td_error: -0.08964113146066666
        min_q: 3.635878086090088
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035794549621641636
        max_q: 3.603222131729126
        mean_q: 3.3384788036346436
        mean_td_error: -0.019323721528053284
        min_q: 3.1269309520721436
    num_steps_sampled: 1374208
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1342,2758.92,1374208,50.9603,55.0146,47.8312,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-53-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.563481983966604
  episode_reward_mean: 51.016692869710624
  episode_reward_min: 47.83117293060332
  episodes_this_iter: 8
  episodes_total: 13760
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1376256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004339833278208971
        max_q: 4.1762471199035645
        mean_q: 3.890550136566162
        mean_td_error: -0.009774908423423767
        min_q: 3.6002283096313477
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00156903057359159
        max_q: 3.191713809967041
        mean_q: 2.9920434951782227
        mean_td_error: -0.011076055467128754
        min_q: 2.8535284996032715
    num_steps_sampled: 1376256
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1344,2764.44,1376256,51.0167,54.5635,47.8312,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-53-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.563481983966604
  episode_reward_mean: 51.2360029381699
  episode_reward_min: 47.83117293060332
  episodes_this_iter: 8
  episodes_total: 13776
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1378304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011113956570625305
        max_q: 3.3702855110168457
        mean_q: 3.218289375305176
        mean_td_error: -0.14695441722869873
        min_q: 3.023756980895996
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003166879527270794
        max_q: 3.2437822818756104
        mean_q: 3.1323890686035156
        mean_td_error: -0.04351111501455307
        min_q: 3.0353572368621826
    num_steps_sampled: 1378304
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1346,2769.91,1378304,51.236,54.5635,47.8312,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-53-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.03215803942607
  episode_reward_mean: 52.28598854031128
  episode_reward_min: 47.919309169484166
  episodes_this_iter: 8
  episodes_total: 13800
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1380352
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002084512962028384
        max_q: 3.4455721378326416
        mean_q: 3.295243501663208
        mean_td_error: 0.002128995954990387
        min_q: 3.092233657836914
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012985406210646033
        max_q: 3.4810948371887207
        mean_q: 3.3701016902923584
        mean_td_error: 0.019912034273147583
        min_q: 3.280853509902954
    num_steps_sampled: 1380352
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1348,2775.28,1380352,52.286,59.0322,47.9193,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-53-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.03215803942607
  episode_reward_mean: 52.70328695371453
  episode_reward_min: 47.70284934731998
  episodes_this_iter: 16
  episodes_total: 13824
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1382400
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012872559949755669
        max_q: 3.504267930984497
        mean_q: 3.2602157592773438
        mean_td_error: -0.18071375787258148
        min_q: 3.1018600463867188
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001242421567440033
        max_q: 3.802802324295044
        mean_q: 3.701673984527588
        mean_td_error: 0.0005816742777824402
        min_q: 3.5305774211883545
    num_steps_sampled: 1382400
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1350,2780.5,1382400,52.7033,59.0322,47.7028,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-53-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.03215803942607
  episode_reward_mean: 52.51638846379461
  episode_reward_min: 47.30161392293117
  episodes_this_iter: 8
  episodes_total: 13840
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1384448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014627774246037006
        max_q: 3.4797239303588867
        mean_q: 3.1819422245025635
        mean_td_error: -0.19897550344467163
        min_q: 2.9197511672973633
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0040228948928415775
        max_q: 4.0177130699157715
        mean_q: 3.9726455211639404
        mean_td_error: 0.08395581692457199
        min_q: 3.8166239261627197
    num_steps_sampled: 1384448
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1352,2785.43,1384448,52.5164,59.0322,47.3016,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-53-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.03215803942607
  episode_reward_mean: 52.67191834135494
  episode_reward_min: 47.30161392293117
  episodes_this_iter: 8
  episodes_total: 13872
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1387520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007361275609582663
        max_q: 3.057178020477295
        mean_q: 2.845653772354126
        mean_td_error: 0.09426393359899521
        min_q: 2.6902148723602295
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010534863686189055
        max_q: 4.204233169555664
        mean_q: 4.079870223999023
        mean_td_error: -0.020868703722953796
        min_q: 3.9412784576416016
    num_steps_sampled: 1387520
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1355,2792.67,1387520,52.6719,59.0322,47.3016,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-53-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.90618943723218
  episode_reward_mean: 51.424117786994515
  episode_reward_min: 46.609444320195166
  episodes_this_iter: 16
  episodes_total: 13904
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1390592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007220969069749117
        max_q: 3.161768913269043
        mean_q: 3.0231375694274902
        mean_td_error: -0.09899607300758362
        min_q: 2.840506076812744
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027875935193151236
        max_q: 4.180792331695557
        mean_q: 3.992579460144043
        mean_td_error: -0.050342537462711334
        min_q: 3.8933799266815186
    num_steps_sampled: 1390592
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1358,2799.43,1390592,51.4241,55.9062,46.6094,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-53-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.815053265027004
  episode_reward_mean: 51.72323111401338
  episode_reward_min: 46.609444320195166
  episodes_this_iter: 16
  episodes_total: 13936
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1393664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019374141469597816
        max_q: 3.3958144187927246
        mean_q: 3.024651288986206
        mean_td_error: -0.2549717426300049
        min_q: 2.8207082748413086
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008167277090251446
        max_q: 4.008110523223877
        mean_q: 3.9382526874542236
        mean_td_error: -0.14703816175460815
        min_q: 3.89180850982666
    num_steps_sampled: 1393664
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1361,2806.23,1393664,51.7232,55.8151,46.6094,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-53-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.71101051488034
  episode_reward_mean: 52.38801235719616
  episode_reward_min: 46.609444320195166
  episodes_this_iter: 8
  episodes_total: 13960
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1396736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007601127959787846
        max_q: 3.11403226852417
        mean_q: 2.918872594833374
        mean_td_error: -0.09183397889137268
        min_q: 2.6812758445739746
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032326302025467157
        max_q: 4.034651756286621
        mean_q: 3.885315418243408
        mean_td_error: -0.06465262919664383
        min_q: 3.8372116088867188
    num_steps_sampled: 1396736
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1364,2813.05,1396736,52.388,59.711,46.6094,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-54-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.71101051488034
  episode_reward_mean: 52.18960619753816
  episode_reward_min: 45.777002733784414
  episodes_this_iter: 8
  episodes_total: 13992
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1399808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016282085329294205
        max_q: 3.060209035873413
        mean_q: 2.9173107147216797
        mean_td_error: -0.23201662302017212
        min_q: 2.811448335647583
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009771413169801235
        max_q: 3.9772791862487793
        mean_q: 3.758890151977539
        mean_td_error: 0.18973158299922943
        min_q: 3.3979275226593018
    num_steps_sampled: 1399808
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1367,2819.83,1399808,52.1896,59.711,45.777,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-54-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.71101051488034
  episode_reward_mean: 52.21121924698451
  episode_reward_min: 45.777002733784414
  episodes_this_iter: 8
  episodes_total: 14024
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1402880
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02617248147726059
        max_q: 3.2735440731048584
        mean_q: 2.8449325561523438
        mean_td_error: -0.35494452714920044
        min_q: 2.6966381072998047
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008853031322360039
        max_q: 3.982828378677368
        mean_q: 3.8376669883728027
        mean_td_error: -0.14122378826141357
        min_q: 3.665203332901001
    num_steps_sampled: 1402880
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1370,2826.58,1402880,52.2112,59.711,45.777,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-54-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.26154597993474
  episode_reward_mean: 51.01242361349863
  episode_reward_min: 45.777002733784414
  episodes_this_iter: 8
  episodes_total: 14056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1405952
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03618008643388748
        max_q: 3.4125680923461914
        mean_q: 2.7040855884552
        mean_td_error: -0.284206748008728
        min_q: 1.944812297821045
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0045581115409731865
        max_q: 3.2802352905273438
        mean_q: 3.1296842098236084
        mean_td_error: -0.05689135193824768
        min_q: 3.0125746726989746
    num_steps_sampled: 1405952
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1373,2833.09,1405952,51.0124,57.2615,45.777,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-54-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.159063337180015
  episode_reward_mean: 50.6434117594277
  episode_reward_min: 45.777002733784414
  episodes_this_iter: 8
  episodes_total: 14088
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1409024
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.020852088928222656
        max_q: 1.4104375839233398
        mean_q: 0.8954718708992004
        mean_td_error: -0.1778913140296936
        min_q: 0.46798017621040344
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0114297429099679
        max_q: 3.2454323768615723
        mean_q: 3.0373263359069824
        mean_td_error: -0.18687953054904938
        min_q: 2.7767767906188965
    num_steps_sampled: 1409024
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1376,2839.63,1409024,50.6434,54.1591,45.777,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-54-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.68211953856103
  episode_reward_mean: 52.71046381300052
  episode_reward_min: 46.23077526528355
  episodes_this_iter: 16
  episodes_total: 14120
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1412096
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04405371844768524
        max_q: 1.0518372058868408
        mean_q: 0.7477308511734009
        mean_td_error: -0.3968908190727234
        min_q: 0.3608354330062866
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005635339301079512
        max_q: 3.199542760848999
        mean_q: 3.0325589179992676
        mean_td_error: -0.08749771118164062
        min_q: 2.93621826171875
    num_steps_sampled: 1412096
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1379,2846.04,1412096,52.7105,58.6821,46.2308,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-54-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.10775296678279
  episode_reward_mean: 53.41616667587021
  episode_reward_min: 46.23077526528355
  episodes_this_iter: 8
  episodes_total: 14144
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1415168
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03452327474951744
        max_q: 1.3435789346694946
        mean_q: 1.0679618120193481
        mean_td_error: -0.31708166003227234
        min_q: 0.9473726749420166
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.029876617714762688
        max_q: 3.132097005844116
        mean_q: 2.9299585819244385
        mean_td_error: -0.45087575912475586
        min_q: 2.7597696781158447
    num_steps_sampled: 1415168
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1382,2852.3,1415168,53.4162,59.1078,46.2308,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-54-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.10775296678279
  episode_reward_mean: 52.94843588542366
  episode_reward_min: 46.23077526528355
  episodes_this_iter: 8
  episodes_total: 14176
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1418240
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006858621723949909
        max_q: 2.404283285140991
        mean_q: 2.3131227493286133
        mean_td_error: 0.06387388706207275
        min_q: 2.1862237453460693
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01493002101778984
        max_q: 2.725573778152466
        mean_q: 2.589775562286377
        mean_td_error: -0.2387395054101944
        min_q: 2.439476490020752
    num_steps_sampled: 1418240
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1385,2858.94,1418240,52.9484,59.1078,46.2308,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-54-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.10775296678279
  episode_reward_mean: 52.4066873621684
  episode_reward_min: 48.63740373414158
  episodes_this_iter: 8
  episodes_total: 14208
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1421312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003118782537057996
        max_q: 3.0707857608795166
        mean_q: 2.968902826309204
        mean_td_error: -0.0057919323444366455
        min_q: 2.8810644149780273
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014588365331292152
        max_q: 2.7575972080230713
        mean_q: 2.538466453552246
        mean_td_error: -0.23085498809814453
        min_q: 2.443390369415283
    num_steps_sampled: 1421312
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1388,2866.02,1421312,52.4067,59.1078,48.6374,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-54-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.81295352392032
  episode_reward_mean: 50.87838295378817
  episode_reward_min: 47.83761892856009
  episodes_this_iter: 8
  episodes_total: 14240
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1424384
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005042172968387604
        max_q: 3.5779500007629395
        mean_q: 3.4967381954193115
        mean_td_error: -0.049685120582580566
        min_q: 3.419848680496216
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03452812880277634
        max_q: 3.0608832836151123
        mean_q: 2.594944953918457
        mean_td_error: -0.440902441740036
        min_q: 2.2986583709716797
    num_steps_sampled: 1424384
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1391,2873.11,1424384,50.8784,53.813,47.8376,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-55-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.81295352392032
  episode_reward_mean: 51.205013245921926
  episode_reward_min: 47.83761892856009
  episodes_this_iter: 8
  episodes_total: 14272
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1427456
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010129121132194996
        max_q: 3.897753953933716
        mean_q: 3.8386807441711426
        mean_td_error: 0.10194747149944305
        min_q: 3.7786567211151123
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010473497211933136
        max_q: 2.9016809463500977
        mean_q: 2.664874792098999
        mean_td_error: -0.12452062964439392
        min_q: 2.30539608001709
    num_steps_sampled: 1427456
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1394,2880.17,1427456,51.205,53.813,47.8376,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-55-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.81295352392032
  episode_reward_mean: 51.00620362156818
  episode_reward_min: 46.94034951829206
  episodes_this_iter: 8
  episodes_total: 14288
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1429504
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006232075858861208
        max_q: 4.076587200164795
        mean_q: 3.9677295684814453
        mean_td_error: 0.06881015002727509
        min_q: 3.904010772705078
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026548041496425867
        max_q: 3.1276378631591797
        mean_q: 3.0133564472198486
        mean_td_error: 0.029447369277477264
        min_q: 2.8875014781951904
    num_steps_sampled: 1429504
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1396,2885.57,1429504,51.0062,53.813,46.9403,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-55-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.64354284942896
  episode_reward_mean: 50.8602570370134
  episode_reward_min: 46.94034951829206
  episodes_this_iter: 8
  episodes_total: 14312
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1431552
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0065067606046795845
        max_q: 4.151200294494629
        mean_q: 4.0732598304748535
        mean_td_error: -0.06477215141057968
        min_q: 3.930549383163452
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0184785109013319
        max_q: 2.9436542987823486
        mean_q: 2.7840821743011475
        mean_td_error: -0.24860435724258423
        min_q: 2.6221351623535156
    num_steps_sampled: 1431552
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1398,2890.58,1431552,50.8603,54.6435,46.9403,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-55-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.64354284942896
  episode_reward_mean: 51.290101781771234
  episode_reward_min: 46.94034951829206
  episodes_this_iter: 8
  episodes_total: 14344
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1434624
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003626125631853938
        max_q: 4.012960433959961
        mean_q: 3.946897268295288
        mean_td_error: -0.036086224019527435
        min_q: 3.881469249725342
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015620567835867405
        max_q: 2.765296697616577
        mean_q: 2.458482265472412
        mean_td_error: -0.18942409753799438
        min_q: 2.234424591064453
    num_steps_sampled: 1434624
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1401,2897.82,1434624,51.2901,54.6435,46.9403,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-55-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.64354284942896
  episode_reward_mean: 51.46931812880326
  episode_reward_min: 46.94034951829206
  episodes_this_iter: 8
  episodes_total: 14360
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1436672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001912151463329792
        max_q: 4.092616081237793
        mean_q: 4.017266750335693
        mean_td_error: -0.009420499205589294
        min_q: 3.9283030033111572
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005938457325100899
        max_q: 2.670698881149292
        mean_q: 2.4983913898468018
        mean_td_error: -0.056142307817935944
        min_q: 2.2600338459014893
    num_steps_sampled: 1436672
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1403,2902.77,1436672,51.4693,54.6435,46.9403,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-55-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.64354284942896
  episode_reward_mean: 50.86334337900852
  episode_reward_min: 45.50516693696523
  episodes_this_iter: 8
  episodes_total: 14384
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1438720
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0063153887167572975
        max_q: 4.18759822845459
        mean_q: 4.101722717285156
        mean_td_error: -0.05925972759723663
        min_q: 4.0182952880859375
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023459292948246002
        max_q: 2.963466167449951
        mean_q: 2.707051992416382
        mean_td_error: -0.21788427233695984
        min_q: 2.4911046028137207
    num_steps_sampled: 1438720
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1405,2907.81,1438720,50.8633,54.6435,45.5052,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-55-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.64354284942896
  episode_reward_mean: 50.57486472718112
  episode_reward_min: 45.50516693696523
  episodes_this_iter: 8
  episodes_total: 14400
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1440768
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0050073182210326195
        max_q: 4.281556606292725
        mean_q: 4.211997032165527
        mean_td_error: -0.07353635132312775
        min_q: 4.12760066986084
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014914208091795444
        max_q: 2.954338550567627
        mean_q: 2.7461228370666504
        mean_td_error: -0.1521752029657364
        min_q: 2.6290934085845947
    num_steps_sampled: 1440768
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1407,2912.74,1440768,50.5749,54.6435,45.5052,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-55-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.72226156010297
  episode_reward_mean: 49.405432996352914
  episode_reward_min: 45.50516693696523
  episodes_this_iter: 8
  episodes_total: 14432
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1443840
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002019369276240468
        max_q: 4.491835594177246
        mean_q: 4.424533367156982
        mean_td_error: 0.014132291078567505
        min_q: 4.354266166687012
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.028319014236330986
        max_q: 2.698089361190796
        mean_q: 2.456901788711548
        mean_td_error: -0.30201539397239685
        min_q: 2.1489806175231934
    num_steps_sampled: 1443840
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1410,2919.66,1443840,49.4054,53.7223,45.5052,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-55-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 51.50941780258738
  episode_reward_mean: 48.23577499553186
  episode_reward_min: 45.50516693696523
  episodes_this_iter: 8
  episodes_total: 14464
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1446912
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005162792280316353
        max_q: 4.622415542602539
        mean_q: 4.489195346832275
        mean_td_error: -0.06992849707603455
        min_q: 4.3939409255981445
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0074455635622143745
        max_q: 2.905381202697754
        mean_q: 2.769022226333618
        mean_td_error: -0.08686011284589767
        min_q: 2.5567502975463867
    num_steps_sampled: 1446912
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1413,2926.2,1446912,48.2358,51.5094,45.5052,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-55-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.682408370403934
  episode_reward_mean: 49.978646906782394
  episode_reward_min: 45.71909628761542
  episodes_this_iter: 8
  episodes_total: 14496
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1449984
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0067389123141765594
        max_q: 4.449552536010742
        mean_q: 4.224515914916992
        mean_td_error: -0.05345138907432556
        min_q: 4.095890998840332
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003158325795084238
        max_q: 2.893038034439087
        mean_q: 2.7489430904388428
        mean_td_error: -0.021959245204925537
        min_q: 2.549372673034668
    num_steps_sampled: 1449984
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1416,2932.42,1449984,49.9786,56.6824,45.7191,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-56-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.682408370403934
  episode_reward_mean: 50.56652104298465
  episode_reward_min: 44.29846565023309
  episodes_this_iter: 8
  episodes_total: 14528
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1453056
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006919137667864561
        max_q: 4.559362411499023
        mean_q: 4.478606700897217
        mean_td_error: -0.05700145661830902
        min_q: 4.41725492477417
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010570456273853779
        max_q: 2.976316452026367
        mean_q: 2.838787317276001
        mean_td_error: -0.124417245388031
        min_q: 2.6911351680755615
    num_steps_sampled: 1453056
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1419,2938.42,1453056,50.5665,56.6824,44.2985,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-56-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.682408370403934
  episode_reward_mean: 51.02059289627985
  episode_reward_min: 44.29846565023309
  episodes_this_iter: 16
  episodes_total: 14560
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1456128
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02203213982284069
        max_q: 4.577991485595703
        mean_q: 4.254184722900391
        mean_td_error: -0.21243953704833984
        min_q: 4.067242622375488
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017469447338953614
        max_q: 3.264484405517578
        mean_q: 3.2000980377197266
        mean_td_error: 0.0021048039197921753
        min_q: 3.086651563644409
    num_steps_sampled: 1456128
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1422,2944.28,1456128,51.0206,56.6824,44.2985,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-56-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.025546812315284
  episode_reward_mean: 51.13627946254006
  episode_reward_min: 44.29846565023309
  episodes_this_iter: 16
  episodes_total: 14592
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1459200
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005659202113747597
        max_q: 4.527214050292969
        mean_q: 4.281110763549805
        mean_td_error: 0.06699948012828827
        min_q: 4.211345195770264
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.022535279393196106
        max_q: 2.985305070877075
        mean_q: 2.7654507160186768
        mean_td_error: -0.248016357421875
        min_q: 2.6720213890075684
    num_steps_sampled: 1459200
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1425,2950.15,1459200,51.1363,56.0255,44.2985,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-56-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.025546812315284
  episode_reward_mean: 50.68188661059671
  episode_reward_min: 44.32844037778512
  episodes_this_iter: 8
  episodes_total: 14616
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1462272
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008092903532087803
        max_q: 4.3002729415893555
        mean_q: 4.134617328643799
        mean_td_error: -0.08065788447856903
        min_q: 4.006990909576416
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006646467838436365
        max_q: 2.9326484203338623
        mean_q: 2.80183744430542
        mean_td_error: -0.06852931529283524
        min_q: 2.6023521423339844
    num_steps_sampled: 1462272
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1428,2955.83,1462272,50.6819,56.0255,44.3284,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-56-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.025546812315284
  episode_reward_mean: 50.80684711094161
  episode_reward_min: 44.52210284503766
  episodes_this_iter: 8
  episodes_total: 14648
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1465344
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009240721352398396
        max_q: 4.2066731452941895
        mean_q: 4.078404426574707
        mean_td_error: -0.10979947447776794
        min_q: 3.894890308380127
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009842715226113796
        max_q: 2.8544445037841797
        mean_q: 2.6515281200408936
        mean_td_error: -0.10232595354318619
        min_q: 2.3622074127197266
    num_steps_sampled: 1465344
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1431,2961.32,1465344,50.8068,56.0255,44.5221,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-56-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.025546812315284
  episode_reward_mean: 49.781639717105364
  episode_reward_min: 44.52210284503766
  episodes_this_iter: 8
  episodes_total: 14680
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1468416
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01740887202322483
        max_q: 4.289104461669922
        mean_q: 4.106350421905518
        mean_td_error: -0.20092010498046875
        min_q: 3.923661470413208
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018279483541846275
        max_q: 2.437598705291748
        mean_q: 2.179835557937622
        mean_td_error: -0.22465792298316956
        min_q: 1.756829023361206
    num_steps_sampled: 1468416
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1434,2967,1468416,49.7816,56.0255,44.5221,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-56-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.951219779656284
  episode_reward_mean: 50.05685811289476
  episode_reward_min: 44.52210284503766
  episodes_this_iter: 8
  episodes_total: 14712
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1471488
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00571388378739357
        max_q: 4.107230186462402
        mean_q: 3.940199613571167
        mean_td_error: -0.06088275462388992
        min_q: 3.8351151943206787
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005126064643263817
        max_q: 2.3272368907928467
        mean_q: 1.9669829607009888
        mean_td_error: 0.015384498983621597
        min_q: 1.7432647943496704
    num_steps_sampled: 1471488
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1437,2972.85,1471488,50.0569,53.9512,44.5221,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-56-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.11001648531599
  episode_reward_mean: 51.26904008606097
  episode_reward_min: 45.60874536731745
  episodes_this_iter: 16
  episodes_total: 14744
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1474560
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010981165803968906
        max_q: 3.9315600395202637
        mean_q: 3.805502414703369
        mean_td_error: -0.13494423031806946
        min_q: 3.7290170192718506
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005627973470836878
        max_q: 1.7918205261230469
        mean_q: 1.6877713203430176
        mean_td_error: -0.06030070409178734
        min_q: 1.3513455390930176
    num_steps_sampled: 1474560
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1440,2979.16,1474560,51.269,57.11,45.6087,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-56-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.11001648531599
  episode_reward_mean: 52.74195036011677
  episode_reward_min: 48.363510724310416
  episodes_this_iter: 16
  episodes_total: 14776
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1477632
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005194025579839945
        max_q: 4.0443243980407715
        mean_q: 3.953878402709961
        mean_td_error: 0.06588386744260788
        min_q: 3.71848201751709
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011892428621649742
        max_q: 0.783592700958252
        mean_q: 0.5562394857406616
        mean_td_error: -0.10548081248998642
        min_q: 0.0030556917190551758
    num_steps_sampled: 1477632
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1443,2985.47,1477632,52.742,57.11,48.3635,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-56-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.11001648531599
  episode_reward_mean: 51.87244374575017
  episode_reward_min: 45.43439156608635
  episodes_this_iter: 8
  episodes_total: 14800
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1480704
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012870438396930695
        max_q: 4.081851005554199
        mean_q: 3.9892144203186035
        mean_td_error: 0.17060382664203644
        min_q: 3.8838562965393066
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01391228474676609
        max_q: 0.7901158928871155
        mean_q: 0.7102175951004028
        mean_td_error: 0.14712269604206085
        min_q: 0.6488785743713379
    num_steps_sampled: 1480704
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1446,2991.78,1480704,51.8724,57.11,45.4344,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-57-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.11001648531599
  episode_reward_mean: 50.910257148747625
  episode_reward_min: 45.43439156608635
  episodes_this_iter: 8
  episodes_total: 14832
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1483776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007854395546019077
        max_q: 3.961176633834839
        mean_q: 3.751328229904175
        mean_td_error: -0.08647625148296356
        min_q: 3.6396446228027344
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0055073900148272514
        max_q: 2.0859997272491455
        mean_q: 1.8960767984390259
        mean_td_error: 0.035281725227832794
        min_q: 1.7699601650238037
    num_steps_sampled: 1483776
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1449,2998.1,1483776,50.9103,57.11,45.4344,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-57-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 52.735225735886125
  episode_reward_mean: 50.11147036648016
  episode_reward_min: 45.43439156608635
  episodes_this_iter: 8
  episodes_total: 14864
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1486848
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02170122228562832
        max_q: 3.6688642501831055
        mean_q: 3.554995059967041
        mean_td_error: -0.2622440457344055
        min_q: 3.3625125885009766
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01860366202890873
        max_q: 2.235222339630127
        mean_q: 2.077316999435425
        mean_td_error: -0.19045817852020264
        min_q: 1.9254618883132935
    num_steps_sampled: 1486848
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1452,3004.16,1486848,50.1115,52.7352,45.4344,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-57-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.32248820148828
  episode_reward_mean: 51.077705768408244
  episode_reward_min: 46.092739139666854
  episodes_this_iter: 8
  episodes_total: 14896
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1489920
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0069486224092543125
        max_q: 4.01384973526001
        mean_q: 3.417682409286499
        mean_td_error: -0.015907064080238342
        min_q: 3.1622533798217773
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0038014966994524
        max_q: 2.4885029792785645
        mean_q: 2.332143783569336
        mean_td_error: 0.01934828609228134
        min_q: 2.1675949096679688
    num_steps_sampled: 1489920
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1455,3010.51,1489920,51.0777,57.3225,46.0927,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-57-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.32248820148828
  episode_reward_mean: 51.69510849617423
  episode_reward_min: 47.33048804419651
  episodes_this_iter: 16
  episodes_total: 14928
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1492992
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00659942626953125
        max_q: 3.0864920616149902
        mean_q: 2.8142008781433105
        mean_td_error: 0.09285221248865128
        min_q: 2.601956844329834
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014952205121517181
        max_q: 2.6422982215881348
        mean_q: 2.456113576889038
        mean_td_error: -0.16617539525032043
        min_q: 2.334622859954834
    num_steps_sampled: 1492992
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1458,3017.05,1492992,51.6951,57.3225,47.3305,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-57-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.13971074270875
  episode_reward_mean: 52.2524486974498
  episode_reward_min: 47.33048804419651
  episodes_this_iter: 16
  episodes_total: 14960
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1496064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010341976769268513
        max_q: 2.551262617111206
        mean_q: 2.432340145111084
        mean_td_error: -0.14998893439769745
        min_q: 2.357515811920166
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004672119859606028
        max_q: 2.7720277309417725
        mean_q: 2.6541764736175537
        mean_td_error: 0.060586683452129364
        min_q: 2.459273338317871
    num_steps_sampled: 1496064
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1461,3024.09,1496064,52.2524,59.1397,47.3305,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-57-38
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.13971074270875
  episode_reward_mean: 52.30747365142156
  episode_reward_min: 47.33048804419651
  episodes_this_iter: 8
  episodes_total: 14976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1498112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012965095229446888
        max_q: 2.952253580093384
        mean_q: 2.8659214973449707
        mean_td_error: -0.005128949880599976
        min_q: 2.810762643814087
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037276139482855797
        max_q: 3.355839252471924
        mean_q: 3.253460645675659
        mean_td_error: 0.0559673011302948
        min_q: 3.1588761806488037
    num_steps_sampled: 1498112
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1463,3029.71,1498112,52.3075,59.1397,47.3305,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-57-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.832275849165335
  episode_reward_mean: 53.37062963375038
  episode_reward_min: 48.586739140453055
  episodes_this_iter: 16
  episodes_total: 15000
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1500160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03677976876497269
        max_q: 2.973696231842041
        mean_q: 2.7472147941589355
        mean_td_error: -0.502620279788971
        min_q: 2.6165666580200195
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004710564389824867
        max_q: 3.6472105979919434
        mean_q: 3.504805326461792
        mean_td_error: -0.040957920253276825
        min_q: 3.3210649490356445
    num_steps_sampled: 1500160
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1465,3035.52,1500160,53.3706,59.8323,48.5867,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-57-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.832275849165335
  episode_reward_mean: 53.94970457334118
  episode_reward_min: 49.064731155909726
  episodes_this_iter: 8
  episodes_total: 15016
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1502208
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032843605149537325
        max_q: 2.8067612648010254
        mean_q: 2.654628276824951
        mean_td_error: -0.013159245252609253
        min_q: 2.5218801498413086
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00243344041518867
        max_q: 3.8067829608917236
        mean_q: 3.5819637775421143
        mean_td_error: -0.006031952798366547
        min_q: 3.430246591567993
    num_steps_sampled: 1502208
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1467,3041.26,1502208,53.9497,59.8323,49.0647,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-57-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.832275849165335
  episode_reward_mean: 54.27119060258995
  episode_reward_min: 49.064731155909726
  episodes_this_iter: 8
  episodes_total: 15040
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1504256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015568575821816921
        max_q: 3.1127281188964844
        mean_q: 2.9887800216674805
        mean_td_error: -0.18378928303718567
        min_q: 2.8482983112335205
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02449536882340908
        max_q: 3.571031093597412
        mean_q: 3.358872890472412
        mean_td_error: -0.3205375075340271
        min_q: 3.194040298461914
    num_steps_sampled: 1504256
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1469,3046.99,1504256,54.2712,59.8323,49.0647,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-58-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.832275849165335
  episode_reward_mean: 55.106192169657916
  episode_reward_min: 49.064731155909726
  episodes_this_iter: 8
  episodes_total: 15056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1506304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006520790513604879
        max_q: 3.4007668495178223
        mean_q: 3.2867212295532227
        mean_td_error: -0.06405129283666611
        min_q: 3.1979432106018066
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.022229047492146492
        max_q: 3.815133571624756
        mean_q: 3.5425171852111816
        mean_td_error: -0.2827240526676178
        min_q: 3.4424710273742676
    num_steps_sampled: 1506304
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1471,3052.63,1506304,55.1062,59.8323,49.0647,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-58-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.832275849165335
  episode_reward_mean: 55.40838965673956
  episode_reward_min: 49.064731155909726
  episodes_this_iter: 8
  episodes_total: 15080
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1508352
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011937389150261879
        max_q: 3.69912052154541
        mean_q: 3.574457883834839
        mean_td_error: -0.14715713262557983
        min_q: 3.4621081352233887
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002678370103240013
        max_q: 3.7584049701690674
        mean_q: 3.618356227874756
        mean_td_error: 0.023782216012477875
        min_q: 3.447396755218506
    num_steps_sampled: 1508352
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1473,3058.01,1508352,55.4084,59.8323,49.0647,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-58-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.84987989862367
  episode_reward_mean: 56.63198943126586
  episode_reward_min: 52.93510839133184
  episodes_this_iter: 16
  episodes_total: 15104
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1510400
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006581505760550499
        max_q: 3.9357662200927734
        mean_q: 3.6407058238983154
        mean_td_error: -0.038172900676727295
        min_q: 3.257713556289673
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007503868080675602
        max_q: 3.6505677700042725
        mean_q: 3.5327436923980713
        mean_td_error: -0.14159061014652252
        min_q: 3.398407220840454
    num_steps_sampled: 1510400
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1475,3063.07,1510400,56.632,61.8499,52.9351,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-58-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.84987989862367
  episode_reward_mean: 57.16114269120992
  episode_reward_min: 53.53037911989729
  episodes_this_iter: 8
  episodes_total: 15120
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1512448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006999932695180178
        max_q: 3.1916563510894775
        mean_q: 2.9794394969940186
        mean_td_error: -0.07148382067680359
        min_q: 2.8383591175079346
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022651799954473972
        max_q: 3.914727210998535
        mean_q: 3.8279080390930176
        mean_td_error: 0.037864550948143005
        min_q: 3.752140998840332
    num_steps_sampled: 1512448
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1477,3067.98,1512448,57.1611,61.8499,53.5304,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-58-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.88123483887105
  episode_reward_mean: 57.26654112100635
  episode_reward_min: 53.08903096354228
  episodes_this_iter: 8
  episodes_total: 15152
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1515520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0052544232457876205
        max_q: 3.1127724647521973
        mean_q: 2.9796040058135986
        mean_td_error: -0.056774310767650604
        min_q: 2.793588876724243
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015272858552634716
        max_q: 3.848667621612549
        mean_q: 3.698237180709839
        mean_td_error: -0.23594364523887634
        min_q: 3.629948854446411
    num_steps_sampled: 1515520
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1480,3075.09,1515520,57.2665,61.8812,53.089,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-58-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.88123483887105
  episode_reward_mean: 58.210596692413795
  episode_reward_min: 53.08903096354228
  episodes_this_iter: 16
  episodes_total: 15184
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1518592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.029205232858657837
        max_q: 2.9706263542175293
        mean_q: 2.7034876346588135
        mean_td_error: -0.3395211696624756
        min_q: 2.5128285884857178
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005031175911426544
        max_q: 4.254408836364746
        mean_q: 4.141943454742432
        mean_td_error: -0.0870368629693985
        min_q: 4.039485931396484
    num_steps_sampled: 1518592
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1483,3081.78,1518592,58.2106,61.8812,53.089,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-58-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.88123483887105
  episode_reward_mean: 56.64959119054291
  episode_reward_min: 50.486590937829284
  episodes_this_iter: 16
  episodes_total: 15216
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1521664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012464011088013649
        max_q: 3.3575141429901123
        mean_q: 3.0463361740112305
        mean_td_error: -0.16445747017860413
        min_q: 2.855851888656616
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0036462985444813967
        max_q: 4.250543594360352
        mean_q: 4.1591057777404785
        mean_td_error: -0.05998203158378601
        min_q: 4.054367542266846
    num_steps_sampled: 1521664
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1486,3088.31,1521664,56.6496,61.8812,50.4866,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-58-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.88123483887105
  episode_reward_mean: 56.49965326849054
  episode_reward_min: 50.486590937829284
  episodes_this_iter: 8
  episodes_total: 15240
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1524736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017683685291558504
        max_q: 3.2223918437957764
        mean_q: 3.1184375286102295
        mean_td_error: 0.013541340827941895
        min_q: 2.984534502029419
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012333400081843138
        max_q: 4.439737796783447
        mean_q: 4.364783763885498
        mean_td_error: -0.01712283492088318
        min_q: 4.256534099578857
    num_steps_sampled: 1524736
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1489,3095.26,1524736,56.4997,61.8812,50.4866,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-58-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.049371910686936
  episode_reward_mean: 55.5247081863801
  episode_reward_min: 50.486590937829284
  episodes_this_iter: 8
  episodes_total: 15264
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1526784
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00871909037232399
        max_q: 3.3496155738830566
        mean_q: 3.2041373252868652
        mean_td_error: -0.10163488239049911
        min_q: 3.148667812347412
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004677897784858942
        max_q: 4.60114860534668
        mean_q: 4.450591087341309
        mean_td_error: -0.07630361616611481
        min_q: 4.339507579803467
    num_steps_sampled: 1526784
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1491,3100.26,1526784,55.5247,60.0494,50.4866,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-58-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.59537314203267
  episode_reward_mean: 54.7371567272223
  episode_reward_min: 50.486590937829284
  episodes_this_iter: 16
  episodes_total: 15288
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1528832
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002096588257700205
        max_q: 3.624955177307129
        mean_q: 3.560673236846924
        mean_td_error: -0.0107240229845047
        min_q: 3.4424943923950195
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004478810355067253
        max_q: 4.61550235748291
        mean_q: 4.548271179199219
        mean_td_error: 0.08051595091819763
        min_q: 4.514913082122803
    num_steps_sampled: 1528832
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1493,3105.31,1528832,54.7372,59.5954,50.4866,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-59-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.59537314203267
  episode_reward_mean: 55.12069776009466
  episode_reward_min: 50.583334201280536
  episodes_this_iter: 8
  episodes_total: 15304
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1530880
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0031579257920384407
        max_q: 3.9940123558044434
        mean_q: 3.7917070388793945
        mean_td_error: 0.029993213713169098
        min_q: 3.6916375160217285
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003415412735193968
        max_q: 4.7065606117248535
        mean_q: 4.639013290405273
        mean_td_error: -0.05928315222263336
        min_q: 4.55088472366333
    num_steps_sampled: 1530880
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1495,3110.54,1530880,55.1207,59.5954,50.5833,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-59-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.59537314203267
  episode_reward_mean: 55.50082278380598
  episode_reward_min: 51.07029619440905
  episodes_this_iter: 16
  episodes_total: 15328
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1532928
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005423516966402531
        max_q: 4.00223445892334
        mean_q: 3.7637739181518555
        mean_td_error: -0.03532973676919937
        min_q: 3.5868008136749268
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013289876515045762
        max_q: 4.798696517944336
        mean_q: 4.73284912109375
        mean_td_error: 0.020080819725990295
        min_q: 4.655635833740234
    num_steps_sampled: 1532928
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1497,3115.61,1532928,55.5008,59.5954,51.0703,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-59-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.58483751178826
  episode_reward_mean: 56.18611207244148
  episode_reward_min: 54.05942079657954
  episodes_this_iter: 16
  episodes_total: 15360
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1536000
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004839422181248665
        max_q: 3.921224355697632
        mean_q: 3.777710199356079
        mean_td_error: -0.062225162982940674
        min_q: 3.6750290393829346
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017442122101783752
        max_q: 4.395443439483643
        mean_q: 4.256594657897949
        mean_td_error: -0.3459162712097168
        min_q: 4.154048919677734
    num_steps_sampled: 1536000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1500,3122.69,1536000,56.1861,59.5848,54.0594,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-59-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.6150884992272
  episode_reward_mean: 56.142724107309235
  episode_reward_min: 52.893659450976834
  episodes_this_iter: 8
  episodes_total: 15384
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1539072
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017139556584879756
        max_q: 3.939969301223755
        mean_q: 3.8722944259643555
        mean_td_error: -0.028845272958278656
        min_q: 3.789987325668335
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015541737666353583
        max_q: 4.424360275268555
        mean_q: 4.346404552459717
        mean_td_error: 0.026237711310386658
        min_q: 4.284394264221191
    num_steps_sampled: 1539072
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1503,3129.75,1539072,56.1427,58.6151,52.8937,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-59-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.6150884992272
  episode_reward_mean: 55.76866169294329
  episode_reward_min: 52.893659450976834
  episodes_this_iter: 8
  episodes_total: 15416
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1542144
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012117433361709118
        max_q: 4.291423320770264
        mean_q: 4.0433549880981445
        mean_td_error: -0.17770275473594666
        min_q: 3.842141628265381
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002353779273107648
        max_q: 4.583901405334473
        mean_q: 4.51116943359375
        mean_td_error: -0.05013051629066467
        min_q: 4.420657634735107
    num_steps_sampled: 1542144
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1506,3136.61,1542144,55.7687,58.6151,52.8937,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-59-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.6150884992272
  episode_reward_mean: 54.549627877665465
  episode_reward_min: 49.88672718547299
  episodes_this_iter: 8
  episodes_total: 15448
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1545216
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04418153315782547
        max_q: 3.998569965362549
        mean_q: 3.5621581077575684
        mean_td_error: -0.6156949400901794
        min_q: 3.2159271240234375
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00722002238035202
        max_q: 4.953180313110352
        mean_q: 4.497392177581787
        mean_td_error: -0.12275293469429016
        min_q: 4.334517955780029
    num_steps_sampled: 1545216
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1509,3143.39,1545216,54.5496,58.6151,49.8867,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-59-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.48757298655532
  episode_reward_mean: 53.19641085807907
  episode_reward_min: 47.89323481587548
  episodes_this_iter: 8
  episodes_total: 15480
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1548288
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010821028612554073
        max_q: 3.5222151279449463
        mean_q: 3.324018955230713
        mean_td_error: -0.16520702838897705
        min_q: 3.202146291732788
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01114520151168108
        max_q: 4.686474323272705
        mean_q: 4.335827827453613
        mean_td_error: -0.1979048252105713
        min_q: 4.097639083862305
    num_steps_sampled: 1548288
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1512,3150.17,1548288,53.1964,57.4876,47.8932,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-59-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.73386590814651
  episode_reward_mean: 53.53026859879723
  episode_reward_min: 47.89323481587548
  episodes_this_iter: 16
  episodes_total: 15512
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1551360
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013582377694547176
        max_q: 3.442157745361328
        mean_q: 3.2560272216796875
        mean_td_error: -0.20649921894073486
        min_q: 3.0644614696502686
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004520981572568417
        max_q: 4.110846996307373
        mean_q: 3.98041033744812
        mean_td_error: -0.08607367426156998
        min_q: 3.888688087463379
    num_steps_sampled: 1551360
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1515,3156.97,1551360,53.5303,58.7339,47.8932,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_13-59-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.73386590814651
  episode_reward_mean: 52.50762033171197
  episode_reward_min: 47.185015792023634
  episodes_this_iter: 16
  episodes_total: 15544
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1554432
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01626131497323513
        max_q: 2.541635274887085
        mean_q: 1.5379642248153687
        mean_td_error: 0.2694568336009979
        min_q: 0.9637930989265442
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005267200525850058
        max_q: 3.9517364501953125
        mean_q: 3.5945956707000732
        mean_td_error: -0.04013606160879135
        min_q: 3.3491461277008057
    num_steps_sampled: 1554432
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1518,3163.78,1554432,52.5076,58.7339,47.185,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-00-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.73386590814651
  episode_reward_mean: 53.48401282977752
  episode_reward_min: 47.185015792023634
  episodes_this_iter: 8
  episodes_total: 15568
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1557504
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012680568732321262
        max_q: 2.5722460746765137
        mean_q: 2.39506459236145
        mean_td_error: 0.20356954634189606
        min_q: 2.1055612564086914
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.020695170387625694
        max_q: 3.108624219894409
        mean_q: 2.935945987701416
        mean_td_error: -0.49088478088378906
        min_q: 2.8011295795440674
    num_steps_sampled: 1557504
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1521,3170.65,1557504,53.484,58.7339,47.185,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-00-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.73386590814651
  episode_reward_mean: 53.33056574238847
  episode_reward_min: 47.185015792023634
  episodes_this_iter: 8
  episodes_total: 15600
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1560576
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025441499892622232
        max_q: 3.3901758193969727
        mean_q: 3.2501156330108643
        mean_td_error: -0.025349102914333344
        min_q: 3.1342790126800537
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002732980065047741
        max_q: 3.372145891189575
        mean_q: 3.082378625869751
        mean_td_error: -0.010765552520751953
        min_q: 2.9442293643951416
    num_steps_sampled: 1560576
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1524,3177.72,1560576,53.3306,58.7339,47.185,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-00-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.363389003147475
  episode_reward_mean: 53.49792840082848
  episode_reward_min: 47.185015792023634
  episodes_this_iter: 8
  episodes_total: 15632
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1563648
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028217174112796783
        max_q: 3.876669406890869
        mean_q: 3.8343324661254883
        mean_td_error: 0.05185525119304657
        min_q: 3.481013298034668
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007599019445478916
        max_q: 3.274775505065918
        mean_q: 3.16245698928833
        mean_td_error: -0.15631181001663208
        min_q: 3.0969505310058594
    num_steps_sampled: 1563648
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1527,3184.82,1563648,53.4979,58.3634,47.185,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-00-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.363389003147475
  episode_reward_mean: 53.65480875692216
  episode_reward_min: 48.45083582779608
  episodes_this_iter: 16
  episodes_total: 15656
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1565696
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017482356633991003
        max_q: 4.303421974182129
        mean_q: 4.2453389167785645
        mean_td_error: 0.02794136106967926
        min_q: 4.155337333679199
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019326484762132168
        max_q: 3.4240386486053467
        mean_q: 3.230861186981201
        mean_td_error: -0.03239603340625763
        min_q: 3.1047282218933105
    num_steps_sampled: 1565696
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1529,3189.8,1565696,53.6548,58.3634,48.4508,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-00-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.363389003147475
  episode_reward_mean: 52.484167775825455
  episode_reward_min: 46.47481903853046
  episodes_this_iter: 8
  episodes_total: 15672
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1567744
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008447548607364297
        max_q: 4.515760898590088
        mean_q: 4.477626800537109
        mean_td_error: -0.0024155378341674805
        min_q: 4.338784217834473
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0074257198721170425
        max_q: 3.349334716796875
        mean_q: 3.2786781787872314
        mean_td_error: -0.12973017990589142
        min_q: 3.220956325531006
    num_steps_sampled: 1567744
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1531,3195.01,1567744,52.4842,58.3634,46.4748,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-00-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.36483953626254
  episode_reward_mean: 52.47436153778053
  episode_reward_min: 46.47481903853046
  episodes_this_iter: 16
  episodes_total: 15696
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1569792
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012620561756193638
        max_q: 4.6867356300354
        mean_q: 4.627774715423584
        mean_td_error: 0.025045931339263916
        min_q: 4.528067588806152
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018528761574998498
        max_q: 3.583794116973877
        mean_q: 3.47421932220459
        mean_td_error: -0.025275543332099915
        min_q: 3.400047540664673
    num_steps_sampled: 1569792
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1533,3200.23,1569792,52.4744,58.3648,46.4748,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-00-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.36483953626254
  episode_reward_mean: 52.066755633021785
  episode_reward_min: 46.47481903853046
  episodes_this_iter: 8
  episodes_total: 15712
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1571840
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0031465967185795307
        max_q: 4.7380781173706055
        mean_q: 4.672173023223877
        mean_td_error: -0.0634562075138092
        min_q: 4.593662261962891
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020399431232362986
        max_q: 3.719104528427124
        mean_q: 3.633394241333008
        mean_td_error: 0.031032994389533997
        min_q: 3.5433220863342285
    num_steps_sampled: 1571840
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1535,3205.43,1571840,52.0668,58.3648,46.4748,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-00-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.36483953626254
  episode_reward_mean: 52.079589901965434
  episode_reward_min: 46.47481903853046
  episodes_this_iter: 8
  episodes_total: 15736
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1573888
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010300452122464776
        max_q: 4.7163567543029785
        mean_q: 4.628215312957764
        mean_td_error: -0.005077540874481201
        min_q: 4.542858123779297
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004299068357795477
        max_q: 3.9268882274627686
        mean_q: 3.853734254837036
        mean_td_error: 0.08022210747003555
        min_q: 3.661558151245117
    num_steps_sampled: 1573888
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1537,3210.94,1573888,52.0796,58.3648,46.4748,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-00-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.36483953626254
  episode_reward_mean: 51.78205367035627
  episode_reward_min: 46.47481903853046
  episodes_this_iter: 8
  episodes_total: 15752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1575936
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010557648492977023
        max_q: 4.822617530822754
        mean_q: 4.7960405349731445
        mean_td_error: 0.008423805236816406
        min_q: 4.739153861999512
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001570237916894257
        max_q: 4.199375152587891
        mean_q: 4.082232475280762
        mean_td_error: -0.023463234305381775
        min_q: 4.0112409591674805
    num_steps_sampled: 1575936
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1539,3216.43,1575936,51.7821,58.3648,46.4748,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-00-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.36483953626254
  episode_reward_mean: 52.686382557416465
  episode_reward_min: 48.40458258949053
  episodes_this_iter: 8
  episodes_total: 15776
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1577984
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018713772296905518
        max_q: 4.877082347869873
        mean_q: 4.8317084312438965
        mean_td_error: -0.03128872811794281
        min_q: 4.7352800369262695
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03887203708291054
        max_q: 3.784759283065796
        mean_q: 3.346799850463867
        mean_td_error: -0.6969528794288635
        min_q: 3.1173312664031982
    num_steps_sampled: 1577984
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1541,3221.77,1577984,52.6864,58.3648,48.4046,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-01-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.918085669515875
  episode_reward_mean: 52.354668160647115
  episode_reward_min: 48.40458258949053
  episodes_this_iter: 16
  episodes_total: 15800
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1580032
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001841061981394887
        max_q: 4.9788713455200195
        mean_q: 4.909756183624268
        mean_td_error: -0.03695881366729736
        min_q: 4.765830039978027
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017499402165412903
        max_q: 3.623892307281494
        mean_q: 3.5225958824157715
        mean_td_error: -0.023055486381053925
        min_q: 3.4614059925079346
    num_steps_sampled: 1580032
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1543,3227.06,1580032,52.3547,56.9181,48.4046,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-01-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.02972327231714
  episode_reward_mean: 52.71309616805152
  episode_reward_min: 48.40458258949053
  episodes_this_iter: 8
  episodes_total: 15816
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1582080
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006286256480962038
        max_q: 4.837131023406982
        mean_q: 4.7940850257873535
        mean_td_error: -0.11177779734134674
        min_q: 4.684788227081299
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019926685839891434
        max_q: 3.5744874477386475
        mean_q: 3.4146783351898193
        mean_td_error: -0.34389305114746094
        min_q: 3.2929511070251465
    num_steps_sampled: 1582080
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1545,3232.43,1582080,52.7131,58.0297,48.4046,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-01-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.02972327231714
  episode_reward_mean: 52.74992496910688
  episode_reward_min: 48.40458258949053
  episodes_this_iter: 16
  episodes_total: 15840
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1584128
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029149174224585295
        max_q: 5.091137886047363
        mean_q: 4.931431770324707
        mean_td_error: -0.015069395303726196
        min_q: 4.806441307067871
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.05933816358447075
        max_q: 4.0184102058410645
        mean_q: 3.6745433807373047
        mean_td_error: -0.7108966112136841
        min_q: 3.195094108581543
    num_steps_sampled: 1584128
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1547,3237.9,1584128,52.7499,58.0297,48.4046,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-01-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.31650845700011
  episode_reward_mean: 53.307417125786166
  episode_reward_min: 48.586477330041596
  episodes_this_iter: 8
  episodes_total: 15856
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1586176
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016525760293006897
        max_q: 4.914680480957031
        mean_q: 4.846558570861816
        mean_td_error: 0.021800190210342407
        min_q: 4.6931891441345215
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03431370481848717
        max_q: 3.1266045570373535
        mean_q: 2.5866973400115967
        mean_td_error: -0.37287136912345886
        min_q: 2.41434383392334
    num_steps_sampled: 1586176
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1549,3243.82,1586176,53.3074,58.3165,48.5865,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-01-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.31650845700011
  episode_reward_mean: 53.42393838778592
  episode_reward_min: 48.63903877307676
  episodes_this_iter: 8
  episodes_total: 15880
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1588224
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016035671578720212
        max_q: 4.813540458679199
        mean_q: 4.66807222366333
        mean_td_error: 0.004700139164924622
        min_q: 4.538045406341553
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025485103949904442
        max_q: 2.440450668334961
        mean_q: 2.047816514968872
        mean_td_error: -0.23351556062698364
        min_q: 1.661217212677002
    num_steps_sampled: 1588224
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1551,3249.79,1588224,53.4239,58.3165,48.639,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-01-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.31650845700011
  episode_reward_mean: 53.19065985235132
  episode_reward_min: 48.63903877307676
  episodes_this_iter: 8
  episodes_total: 15896
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1590272
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009399713017046452
        max_q: 4.776874542236328
        mean_q: 4.625626564025879
        mean_td_error: -0.16329024732112885
        min_q: 4.5438408851623535
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04779723286628723
        max_q: 2.14725399017334
        mean_q: 1.7436586618423462
        mean_td_error: -0.5387146472930908
        min_q: 1.5269746780395508
    num_steps_sampled: 1590272
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1553,3256.13,1590272,53.1907,58.3165,48.639,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-01-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.31650845700011
  episode_reward_mean: 53.19137912706785
  episode_reward_min: 48.63903877307676
  episodes_this_iter: 8
  episodes_total: 15920
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1592320
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004118430893868208
        max_q: 4.408318519592285
        mean_q: 4.322843074798584
        mean_td_error: -0.07494772970676422
        min_q: 4.2708964347839355
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025374477729201317
        max_q: 1.793062686920166
        mean_q: 1.4158995151519775
        mean_td_error: -0.2996508479118347
        min_q: 1.1583318710327148
    num_steps_sampled: 1592320
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1555,3262.39,1592320,53.1914,58.3165,48.639,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-01-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.31650845700011
  episode_reward_mean: 53.08693526271386
  episode_reward_min: 48.63591411412163
  episodes_this_iter: 8
  episodes_total: 15936
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1594368
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.000668295833747834
        max_q: 4.554934978485107
        mean_q: 4.476265907287598
        mean_td_error: 0.0069873034954071045
        min_q: 4.3935346603393555
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02538607455790043
        max_q: 1.3305933475494385
        mean_q: 1.0681947469711304
        mean_td_error: -0.3054669499397278
        min_q: 0.8765540719032288
    num_steps_sampled: 1594368
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1557,3268.33,1594368,53.0869,58.3165,48.6359,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-01-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.16346569090793
  episode_reward_mean: 53.0738875500548
  episode_reward_min: 48.63591411412163
  episodes_this_iter: 8
  episodes_total: 15960
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1596416
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026084608398377895
        max_q: 4.54360818862915
        mean_q: 4.394016742706299
        mean_td_error: 0.03380104899406433
        min_q: 4.269128799438477
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003893420333042741
        max_q: 1.4696120023727417
        mean_q: 1.3429794311523438
        mean_td_error: -0.029741868376731873
        min_q: 1.2268227338790894
    num_steps_sampled: 1596416
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1559,3273.92,1596416,53.0739,57.1635,48.6359,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-01-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.16346569090793
  episode_reward_mean: 52.09200852233302
  episode_reward_min: 46.09922388160804
  episodes_this_iter: 16
  episodes_total: 15984
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1598464
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023516768123954535
        max_q: 4.334819793701172
        mean_q: 4.236682891845703
        mean_td_error: 0.038551852107048035
        min_q: 4.04642915725708
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010367016308009624
        max_q: 1.9954643249511719
        mean_q: 1.8211795091629028
        mean_td_error: -0.1230006217956543
        min_q: 1.7444251775741577
    num_steps_sampled: 1598464
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1561,3279.35,1598464,52.092,57.1635,46.0992,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-02-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.16346569090793
  episode_reward_mean: 51.88388326989372
  episode_reward_min: 46.09922388160804
  episodes_this_iter: 8
  episodes_total: 16000
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1600512
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003729461692273617
        max_q: 4.490784168243408
        mean_q: 4.25666618347168
        mean_td_error: 0.04420952498912811
        min_q: 4.0256476402282715
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006640182808041573
        max_q: 2.4870800971984863
        mean_q: 2.357218027114868
        mean_td_error: 0.08424152433872223
        min_q: 2.2504332065582275
    num_steps_sampled: 1600512
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1563,3284.42,1600512,51.8839,57.1635,46.0992,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-02-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.16346569090793
  episode_reward_mean: 51.74879149054307
  episode_reward_min: 46.09922388160804
  episodes_this_iter: 16
  episodes_total: 16024
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1602560
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0074555641040205956
        max_q: 4.244996547698975
        mean_q: 4.107255935668945
        mean_td_error: 0.13728831708431244
        min_q: 3.924042224884033
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004114275332540274
        max_q: 2.7612648010253906
        mean_q: 2.632958173751831
        mean_td_error: -0.04583514481782913
        min_q: 2.4829347133636475
    num_steps_sampled: 1602560
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1565,3289.51,1602560,51.7488,57.1635,46.0992,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-02-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.16346569090793
  episode_reward_mean: 52.12891344439753
  episode_reward_min: 46.09922388160804
  episodes_this_iter: 16
  episodes_total: 16056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1605632
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00927241425961256
        max_q: 3.52990460395813
        mean_q: 3.4096267223358154
        mean_td_error: -0.17037367820739746
        min_q: 3.1678740978240967
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003670409321784973
        max_q: 3.1748430728912354
        mean_q: 3.0391910076141357
        mean_td_error: -0.03761017322540283
        min_q: 2.9680256843566895
    num_steps_sampled: 1605632
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1568,3296.42,1605632,52.1289,57.1635,46.0992,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-02-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.60895549057872
  episode_reward_mean: 52.90557758236381
  episode_reward_min: 46.09922388160804
  episodes_this_iter: 8
  episodes_total: 16080
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1608704
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003257489763200283
        max_q: 3.549388885498047
        mean_q: 3.393580198287964
        mean_td_error: -0.02476656436920166
        min_q: 3.2629382610321045
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035997291561216116
        max_q: 3.081998586654663
        mean_q: 3.027031660079956
        mean_td_error: -0.03239984065294266
        min_q: 2.958094358444214
    num_steps_sampled: 1608704
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1571,3303.3,1608704,52.9056,58.609,46.0992,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-02-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.60895549057872
  episode_reward_mean: 53.96879242802493
  episode_reward_min: 50.323868579286376
  episodes_this_iter: 8
  episodes_total: 16112
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1611776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06771774590015411
        max_q: 2.208467483520508
        mean_q: 1.3715404272079468
        mean_td_error: -0.911125659942627
        min_q: 0.743844211101532
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005133918020874262
        max_q: 3.2347192764282227
        mean_q: 3.153688907623291
        mean_td_error: -0.05642227828502655
        min_q: 3.041672468185425
    num_steps_sampled: 1611776
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1574,3310.16,1611776,53.9688,58.609,50.3239,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-02-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.60895549057872
  episode_reward_mean: 53.92723763017829
  episode_reward_min: 50.13094429250346
  episodes_this_iter: 8
  episodes_total: 16144
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1614848
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01453917846083641
        max_q: 1.0651434659957886
        mean_q: 0.8952385187149048
        mean_td_error: 0.16151760518550873
        min_q: 0.73075932264328
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004981351085007191
        max_q: 3.598214626312256
        mean_q: 3.5068163871765137
        mean_td_error: -0.07347133010625839
        min_q: 3.4158594608306885
    num_steps_sampled: 1614848
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1577,3316.72,1614848,53.9272,58.609,50.1309,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-02-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.60895549057872
  episode_reward_mean: 53.34425519480022
  episode_reward_min: 46.592484151740955
  episodes_this_iter: 8
  episodes_total: 16176
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1617920
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007896829396486282
        max_q: 1.638116717338562
        mean_q: 1.5044641494750977
        mean_td_error: -0.08361698687076569
        min_q: 1.2980012893676758
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013391489628702402
        max_q: 3.9977426528930664
        mean_q: 3.8954906463623047
        mean_td_error: 0.003507383167743683
        min_q: 3.8235855102539062
    num_steps_sampled: 1617920
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1580,3323.56,1617920,53.3443,58.609,46.5925,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-02-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.145025771639425
  episode_reward_mean: 53.037453621081305
  episode_reward_min: 46.592484151740955
  episodes_this_iter: 16
  episodes_total: 16208
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1620992
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018897974863648415
        max_q: 1.8870763778686523
        mean_q: 1.631879448890686
        mean_td_error: -0.20317909121513367
        min_q: 1.500747561454773
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004560939967632294
        max_q: 4.203033924102783
        mean_q: 4.135286808013916
        mean_td_error: 0.05700990557670593
        min_q: 4.029829502105713
    num_steps_sampled: 1620992
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1583,3330.7,1620992,53.0375,56.145,46.5925,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-02-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.145025771639425
  episode_reward_mean: 52.3636571298385
  episode_reward_min: 46.592484151740955
  episodes_this_iter: 16
  episodes_total: 16240
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1624064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006393740884959698
        max_q: 2.3592803478240967
        mean_q: 2.2499217987060547
        mean_td_error: -0.06475269049406052
        min_q: 2.184959650039673
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012277027126401663
        max_q: 4.282851696014404
        mean_q: 4.219789981842041
        mean_td_error: -0.021174520254135132
        min_q: 4.099055767059326
    num_steps_sampled: 1624064
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1586,3337.82,1624064,52.3637,56.145,46.5925,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-03-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.145025771639425
  episode_reward_mean: 52.208500594109694
  episode_reward_min: 46.592484151740955
  episodes_this_iter: 8
  episodes_total: 16256
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1626112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008248069323599339
        max_q: 2.3998138904571533
        mean_q: 2.183366537094116
        mean_td_error: -0.0664244070649147
        min_q: 2.1022145748138428
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008784647798165679
        max_q: 4.317355632781982
        mean_q: 4.241978168487549
        mean_td_error: 0.01606731116771698
        min_q: 4.1245856285095215
    num_steps_sampled: 1626112
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1588,3342.77,1626112,52.2085,56.145,46.5925,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-03-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.86666896491439
  episode_reward_mean: 52.7949494209816
  episode_reward_min: 47.35161501677328
  episodes_this_iter: 16
  episodes_total: 16280
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1628160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005932760890573263
        max_q: 2.741413116455078
        mean_q: 2.5663652420043945
        mean_td_error: -0.04907281696796417
        min_q: 2.4279510974884033
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011787269031628966
        max_q: 4.376264572143555
        mean_q: 4.304611682891846
        mean_td_error: -0.012539640069007874
        min_q: 4.20908784866333
    num_steps_sampled: 1628160
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1590,3347.8,1628160,52.7949,55.8667,47.3516,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-03-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.262073630749654
  episode_reward_mean: 52.92099993319293
  episode_reward_min: 47.35161501677328
  episodes_this_iter: 8
  episodes_total: 16296
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1630208
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005348738748580217
        max_q: 2.800137996673584
        mean_q: 2.687793493270874
        mean_td_error: -0.04760894179344177
        min_q: 2.61257266998291
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005014483351260424
        max_q: 4.347163677215576
        mean_q: 4.2295427322387695
        mean_td_error: -0.09519748389720917
        min_q: 4.067104339599609
    num_steps_sampled: 1630208
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1592,3353.26,1630208,52.921,56.2621,47.3516,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-03-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.262073630749654
  episode_reward_mean: 52.55381909213796
  episode_reward_min: 47.35161501677328
  episodes_this_iter: 8
  episodes_total: 16320
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1632256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0069728754460811615
        max_q: 2.871920585632324
        mean_q: 2.7999675273895264
        mean_td_error: -0.05947704613208771
        min_q: 2.7278454303741455
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003620065748691559
        max_q: 4.22570276260376
        mean_q: 4.166218280792236
        mean_td_error: -0.06669475138187408
        min_q: 4.023922443389893
    num_steps_sampled: 1632256
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1594,3358.62,1632256,52.5538,56.2621,47.3516,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-03-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.262073630749654
  episode_reward_mean: 53.0471047709627
  episode_reward_min: 47.36884124521388
  episodes_this_iter: 8
  episodes_total: 16336
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1634304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030702450312674046
        max_q: 2.9394216537475586
        mean_q: 2.7576096057891846
        mean_td_error: -0.006315551698207855
        min_q: 2.529787302017212
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026668556965887547
        max_q: 4.186514854431152
        mean_q: 4.129443645477295
        mean_td_error: 0.04874210059642792
        min_q: 3.998263359069824
    num_steps_sampled: 1634304
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1596,3363.61,1634304,53.0471,56.2621,47.3688,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-03-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.262073630749654
  episode_reward_mean: 53.34229336181906
  episode_reward_min: 50.10626515814556
  episodes_this_iter: 8
  episodes_total: 16360
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1636352
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023913126438856125
        max_q: 2.9437434673309326
        mean_q: 2.6975603103637695
        mean_td_error: -0.243240624666214
        min_q: 2.5511038303375244
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007093832362443209
        max_q: 4.217075347900391
        mean_q: 4.139910697937012
        mean_td_error: -0.1346556842327118
        min_q: 4.06862735748291
    num_steps_sampled: 1636352
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1598,3368.62,1636352,53.3423,56.2621,50.1063,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-03-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.262073630749654
  episode_reward_mean: 52.79407398472367
  episode_reward_min: 50.10626515814556
  episodes_this_iter: 16
  episodes_total: 16384
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1638400
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008351249620318413
        max_q: 2.466125726699829
        mean_q: 2.338822603225708
        mean_td_error: -0.07369831949472427
        min_q: 2.1912853717803955
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0073241498321294785
        max_q: 4.179229259490967
        mean_q: 4.0703020095825195
        mean_td_error: -0.1388355791568756
        min_q: 3.9628777503967285
    num_steps_sampled: 1638400
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1600,3373.64,1638400,52.7941,56.2621,50.1063,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-03-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.09152875552206
  episode_reward_mean: 52.20649731595822
  episode_reward_min: 45.98896665302882
  episodes_this_iter: 8
  episodes_total: 16400
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1640448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009676163084805012
        max_q: 2.737703800201416
        mean_q: 2.54007887840271
        mean_td_error: 0.14579090476036072
        min_q: 2.3961193561553955
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007734088692814112
        max_q: 4.276015758514404
        mean_q: 4.039302349090576
        mean_td_error: -0.14208555221557617
        min_q: 3.9279000759124756
    num_steps_sampled: 1640448
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1602,3378.71,1640448,52.2065,55.0915,45.989,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-03-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.09152875552206
  episode_reward_mean: 51.23035064839516
  episode_reward_min: 45.98896665302882
  episodes_this_iter: 16
  episodes_total: 16424
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1642496
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003923316486179829
        max_q: 2.701322078704834
        mean_q: 2.5991921424865723
        mean_td_error: -0.05181701481342316
        min_q: 2.4934921264648438
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0069524324499070644
        max_q: 3.9530043601989746
        mean_q: 3.846376419067383
        mean_td_error: -0.12521755695343018
        min_q: 3.715925455093384
    num_steps_sampled: 1642496
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1604,3383.78,1642496,51.2304,55.0915,45.989,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-03-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.935286221743695
  episode_reward_mean: 51.04339146581349
  episode_reward_min: 45.98896665302882
  episodes_this_iter: 8
  episodes_total: 16440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1644544
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015026392647996545
        max_q: 3.0497331619262695
        mean_q: 2.9574339389801025
        mean_td_error: -0.015542976558208466
        min_q: 2.8712399005889893
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006044378969818354
        max_q: 3.947310209274292
        mean_q: 3.712655544281006
        mean_td_error: -0.07861053943634033
        min_q: 3.5556273460388184
    num_steps_sampled: 1644544
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1606,3388.75,1644544,51.0434,55.9353,45.989,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-03-55
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.935286221743695
  episode_reward_mean: 50.32285756423428
  episode_reward_min: 45.98896665302882
  episodes_this_iter: 16
  episodes_total: 16464
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1646592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003035915084183216
        max_q: 3.4982714653015137
        mean_q: 3.3118228912353516
        mean_td_error: 0.028921037912368774
        min_q: 3.1471729278564453
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008374723605811596
        max_q: 3.810764789581299
        mean_q: 3.761897563934326
        mean_td_error: -0.15964314341545105
        min_q: 3.7134430408477783
    num_steps_sampled: 1646592
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1608,3394,1646592,50.3229,55.9353,45.989,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-04-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.935286221743695
  episode_reward_mean: 49.77857286753497
  episode_reward_min: 45.98896665302882
  episodes_this_iter: 8
  episodes_total: 16480
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1648640
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018031058134511113
        max_q: 3.607490301132202
        mean_q: 3.5295369625091553
        mean_td_error: -0.019215993583202362
        min_q: 3.455470323562622
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0044654859229922295
        max_q: 4.037459850311279
        mean_q: 3.7826833724975586
        mean_td_error: -0.07640785723924637
        min_q: 3.6097216606140137
    num_steps_sampled: 1648640
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1610,3399,1648640,49.7786,55.9353,45.989,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-04-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.935286221743695
  episode_reward_mean: 49.21660968508822
  episode_reward_min: 46.266675004105466
  episodes_this_iter: 8
  episodes_total: 16504
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1650688
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005009725224226713
        max_q: 3.843196392059326
        mean_q: 3.791767120361328
        mean_td_error: -0.06862108409404755
        min_q: 3.7141387462615967
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001321387942880392
        max_q: 4.022721767425537
        mean_q: 3.9220407009124756
        mean_td_error: -0.005306407809257507
        min_q: 3.8509581089019775
    num_steps_sampled: 1650688
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1612,3404,1650688,49.2166,55.9353,46.2667,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-04-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.935286221743695
  episode_reward_mean: 49.61204268937558
  episode_reward_min: 46.266675004105466
  episodes_this_iter: 8
  episodes_total: 16520
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1652736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001143668545410037
        max_q: 4.137399673461914
        mean_q: 4.086918830871582
        mean_td_error: 0.003955423831939697
        min_q: 4.000150680541992
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007200298365205526
        max_q: 3.8744802474975586
        mean_q: 3.6434974670410156
        mean_td_error: -0.133609801530838
        min_q: 3.3812615871429443
    num_steps_sampled: 1652736
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1614,3409.03,1652736,49.612,55.9353,46.2667,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-04-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.43080518558929
  episode_reward_mean: 48.92658220449233
  episode_reward_min: 46.266675004105466
  episodes_this_iter: 8
  episodes_total: 16544
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1654784
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013595119817182422
        max_q: 4.241216659545898
        mean_q: 4.137218475341797
        mean_td_error: -0.007270544767379761
        min_q: 4.085029125213623
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006392701994627714
        max_q: 3.580544948577881
        mean_q: 3.400688409805298
        mean_td_error: -0.11477009952068329
        min_q: 3.2539825439453125
    num_steps_sampled: 1654784
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1616,3413.92,1654784,48.9266,53.4308,46.2667,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-04-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.58660582403638
  episode_reward_mean: 49.48671080736909
  episode_reward_min: 46.687495454885436
  episodes_this_iter: 8
  episodes_total: 16576
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1657856
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010492493165656924
        max_q: 4.177622318267822
        mean_q: 4.099980354309082
        mean_td_error: -0.0082487091422081
        min_q: 3.9756836891174316
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019389603286981583
        max_q: 2.8449106216430664
        mean_q: 2.656815528869629
        mean_td_error: -0.32552891969680786
        min_q: 2.5020790100097656
    num_steps_sampled: 1657856
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1619,3421.02,1657856,49.4867,56.5866,46.6875,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-04-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.58660582403638
  episode_reward_mean: 49.74538729730595
  episode_reward_min: 46.687495454885436
  episodes_this_iter: 16
  episodes_total: 16608
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1660928
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004038423765450716
        max_q: 4.342136859893799
        mean_q: 4.151515483856201
        mean_td_error: -0.042013272643089294
        min_q: 4.030301094055176
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0073626297526061535
        max_q: 2.895591974258423
        mean_q: 2.444877862930298
        mean_td_error: -0.10955376923084259
        min_q: 2.286024570465088
    num_steps_sampled: 1660928
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1622,3428.11,1660928,49.7454,56.5866,46.6875,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-04-38
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.58660582403638
  episode_reward_mean: 48.62168044490941
  episode_reward_min: 42.40842885047445
  episodes_this_iter: 16
  episodes_total: 16640
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1664000
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001958478009328246
        max_q: 4.327610492706299
        mean_q: 4.273043632507324
        mean_td_error: 0.019463226199150085
        min_q: 4.1242547035217285
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010812905617058277
        max_q: 2.4213778972625732
        mean_q: 2.288607120513916
        mean_td_error: -0.1955070197582245
        min_q: 2.168062686920166
    num_steps_sampled: 1664000
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1625,3434.98,1664000,48.6217,56.5866,42.4084,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-04-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.58660582403638
  episode_reward_mean: 48.799777776166046
  episode_reward_min: 42.40842885047445
  episodes_this_iter: 8
  episodes_total: 16664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1667072
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007097476162016392
        max_q: 4.1649885177612305
        mean_q: 4.047180652618408
        mean_td_error: -0.08973810821771622
        min_q: 3.9806675910949707
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016831194516271353
        max_q: 2.759455680847168
        mean_q: 2.625119924545288
        mean_td_error: 0.025077305734157562
        min_q: 2.518171548843384
    num_steps_sampled: 1667072
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1628,3442.24,1667072,48.7998,56.5866,42.4084,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-04-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.05313137375343
  episode_reward_mean: 48.530319321828436
  episode_reward_min: 42.40842885047445
  episodes_this_iter: 8
  episodes_total: 16688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1669120
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010106130503118038
        max_q: 4.134286403656006
        mean_q: 3.9939074516296387
        mean_td_error: -0.16804257035255432
        min_q: 3.9265198707580566
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0058294786140322685
        max_q: 2.9656808376312256
        mean_q: 2.8624565601348877
        mean_td_error: -0.10681743919849396
        min_q: 2.8299214839935303
    num_steps_sampled: 1669120
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1630,3447.43,1669120,48.5303,54.0531,42.4084,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-04-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.05313137375343
  episode_reward_mean: 48.41305057600135
  episode_reward_min: 42.40842885047445
  episodes_this_iter: 8
  episodes_total: 16704
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1671168
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022162911482155323
        max_q: 3.8043735027313232
        mean_q: 3.6328999996185303
        mean_td_error: -0.01924784481525421
        min_q: 3.448004961013794
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004513551015406847
        max_q: 3.141136646270752
        mean_q: 3.062106132507324
        mean_td_error: 0.07425369322299957
        min_q: 2.905916213989258
    num_steps_sampled: 1671168
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1632,3452.7,1671168,48.4131,54.0531,42.4084,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-05-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.05313137375343
  episode_reward_mean: 49.31885379971469
  episode_reward_min: 42.40842885047445
  episodes_this_iter: 8
  episodes_total: 16728
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1673216
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006213815417140722
        max_q: 3.690581798553467
        mean_q: 3.609738349914551
        mean_td_error: -0.08504446595907211
        min_q: 3.552015542984009
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008924376219511032
        max_q: 3.1496262550354004
        mean_q: 3.0202841758728027
        mean_td_error: -0.171464204788208
        min_q: 2.94052791595459
    num_steps_sampled: 1673216
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1634,3458.23,1673216,49.3189,54.0531,42.4084,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-05-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.05313137375343
  episode_reward_mean: 49.203073206175084
  episode_reward_min: 45.26579820185617
  episodes_this_iter: 16
  episodes_total: 16752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1675264
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003996907267719507
        max_q: 3.67223858833313
        mean_q: 3.5543220043182373
        mean_td_error: -0.048087865114212036
        min_q: 3.4560039043426514
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004349026829004288
        max_q: 3.340310573577881
        mean_q: 3.226341962814331
        mean_td_error: -0.0733996108174324
        min_q: 3.118581771850586
    num_steps_sampled: 1675264
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1636,3464.37,1675264,49.2031,54.0531,45.2658,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-05-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.05313137375343
  episode_reward_mean: 49.586610896679815
  episode_reward_min: 45.26579820185617
  episodes_this_iter: 8
  episodes_total: 16768
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1677312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003319029463455081
        max_q: 3.680421829223633
        mean_q: 3.6161863803863525
        mean_td_error: -0.04504925012588501
        min_q: 3.5270023345947266
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027604515198618174
        max_q: 3.469470977783203
        mean_q: 3.2989261150360107
        mean_td_error: -0.036523327231407166
        min_q: 3.1007351875305176
    num_steps_sampled: 1677312
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1638,3470.19,1677312,49.5866,54.0531,45.2658,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-05-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.06030401848292
  episode_reward_mean: 50.510764724459776
  episode_reward_min: 45.26579820185617
  episodes_this_iter: 16
  episodes_total: 16792
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1679360
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025628104340285063
        max_q: 3.476646661758423
        mean_q: 3.315941095352173
        mean_td_error: -0.0280875563621521
        min_q: 3.168229818344116
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003567169886082411
        max_q: 3.4300732612609863
        mean_q: 3.3152623176574707
        mean_td_error: -0.033677004277706146
        min_q: 3.2096524238586426
    num_steps_sampled: 1679360
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1640,3475.85,1679360,50.5108,58.0603,45.2658,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-05-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.06030401848292
  episode_reward_mean: 50.53271467936643
  episode_reward_min: 45.99179479913794
  episodes_this_iter: 8
  episodes_total: 16808
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1681408
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018474359065294266
        max_q: 3.2283589839935303
        mean_q: 3.1331584453582764
        mean_td_error: -0.2825120687484741
        min_q: 3.063178300857544
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007956392131745815
        max_q: 3.076380968093872
        mean_q: 2.8667421340942383
        mean_td_error: -0.06609451770782471
        min_q: 2.668287515640259
    num_steps_sampled: 1681408
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1642,3481.35,1681408,50.5327,58.0603,45.9918,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-05-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.06030401848292
  episode_reward_mean: 50.91508861635177
  episode_reward_min: 45.99637275175652
  episodes_this_iter: 8
  episodes_total: 16832
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1683456
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005514682736247778
        max_q: 3.381286859512329
        mean_q: 3.258882761001587
        mean_td_error: -0.08820121735334396
        min_q: 3.0204005241394043
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018124578520655632
        max_q: 2.9973669052124023
        mean_q: 2.7166054248809814
        mean_td_error: -0.19344967603683472
        min_q: 2.5964064598083496
    num_steps_sampled: 1683456
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1644,3486.91,1683456,50.9151,58.0603,45.9964,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-05-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.06030401848292
  episode_reward_mean: 50.85023411378834
  episode_reward_min: 46.83690012442528
  episodes_this_iter: 8
  episodes_total: 16848
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1685504
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025994593277573586
        max_q: 3.0912375450134277
        mean_q: 2.7279014587402344
        mean_td_error: -0.3955826759338379
        min_q: 2.4375205039978027
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03513341397047043
        max_q: 3.6500425338745117
        mean_q: 3.093820333480835
        mean_td_error: 0.33721548318862915
        min_q: 2.552705764770508
    num_steps_sampled: 1685504
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1646,3492.42,1685504,50.8502,58.0603,46.8369,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-05-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.06030401848292
  episode_reward_mean: 50.95202070313776
  episode_reward_min: 46.83690012442528
  episodes_this_iter: 8
  episodes_total: 16872
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1687552
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006900536362081766
        max_q: 2.820141315460205
        mean_q: 2.677298069000244
        mean_td_error: -0.10583461821079254
        min_q: 2.4908909797668457
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.053808946162462234
        max_q: 1.9945120811462402
        mean_q: 1.627227783203125
        mean_td_error: -0.5563789010047913
        min_q: 1.3083293437957764
    num_steps_sampled: 1687552
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1648,3497.64,1687552,50.952,58.0603,46.8369,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-05-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.90554024075011
  episode_reward_mean: 50.81417947152768
  episode_reward_min: 46.8983445639296
  episodes_this_iter: 16
  episodes_total: 16896
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1689600
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004020036663860083
        max_q: 3.0305259227752686
        mean_q: 2.8383970260620117
        mean_td_error: -0.06132343411445618
        min_q: 2.534440040588379
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025441166013479233
        max_q: 2.2527637481689453
        mean_q: 1.047423243522644
        mean_td_error: 0.27184349298477173
        min_q: 0.3512912392616272
    num_steps_sampled: 1689600
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1650,3502.7,1689600,50.8142,53.9055,46.8983,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-05-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.90554024075011
  episode_reward_mean: 50.57350783906314
  episode_reward_min: 46.236617441565414
  episodes_this_iter: 8
  episodes_total: 16912
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1691648
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007339975796639919
        max_q: 2.8681552410125732
        mean_q: 2.7717137336730957
        mean_td_error: 0.17632398009300232
        min_q: 2.5398848056793213
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04054264724254608
        max_q: 1.5281192064285278
        mean_q: 1.288120150566101
        mean_td_error: 0.4267973303794861
        min_q: 0.994989275932312
    num_steps_sampled: 1691648
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1652,3507.64,1691648,50.5735,53.9055,46.2366,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-06-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.072690603779826
  episode_reward_mean: 51.26867929397285
  episode_reward_min: 46.236617441565414
  episodes_this_iter: 8
  episodes_total: 16944
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1694720
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00805537961423397
        max_q: 3.3344953060150146
        mean_q: 3.2238595485687256
        mean_td_error: -0.12918972969055176
        min_q: 3.1086692810058594
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006153337191790342
        max_q: 1.8940787315368652
        mean_q: 1.7976365089416504
        mean_td_error: -0.05283183231949806
        min_q: 1.7337297201156616
    num_steps_sampled: 1694720
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1655,3514.81,1694720,51.2687,56.0727,46.2366,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-06-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.072690603779826
  episode_reward_mean: 51.34507445993584
  episode_reward_min: 46.236617441565414
  episodes_this_iter: 16
  episodes_total: 16976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1697792
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005542290862649679
        max_q: 3.281991481781006
        mean_q: 3.166269302368164
        mean_td_error: -0.0981900617480278
        min_q: 3.050058364868164
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004745230078697205
        max_q: 2.9402353763580322
        mean_q: 2.6376380920410156
        mean_td_error: 0.04345978796482086
        min_q: 2.5414068698883057
    num_steps_sampled: 1697792
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1658,3521.95,1697792,51.3451,56.0727,46.2366,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-06-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.32642762165696
  episode_reward_mean: 51.79672598544747
  episode_reward_min: 46.57777583970236
  episodes_this_iter: 16
  episodes_total: 17008
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1700864
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001467380439862609
        max_q: 3.2742063999176025
        mean_q: 3.1998913288116455
        mean_td_error: -0.0023445412516593933
        min_q: 3.1119227409362793
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005112171173095703
        max_q: 3.102170944213867
        mean_q: 2.9993698596954346
        mean_td_error: 0.05275934934616089
        min_q: 2.8989105224609375
    num_steps_sampled: 1700864
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1661,3529.12,1700864,51.7967,56.3264,46.5778,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-06-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.32642762165696
  episode_reward_mean: 51.929103216466835
  episode_reward_min: 49.6031294101857
  episodes_this_iter: 8
  episodes_total: 17032
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1703936
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0034364808816462755
        max_q: 3.4295732975006104
        mean_q: 3.2496206760406494
        mean_td_error: -0.04467731714248657
        min_q: 3.04559326171875
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023895585909485817
        max_q: 3.3450334072113037
        mean_q: 3.154604434967041
        mean_td_error: -0.24034473299980164
        min_q: 2.9060723781585693
    num_steps_sampled: 1703936
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1664,3536.08,1703936,51.9291,56.3264,49.6031,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-06-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.48683741223051
  episode_reward_mean: 52.76727387533882
  episode_reward_min: 49.6031294101857
  episodes_this_iter: 8
  episodes_total: 17064
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1707008
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030671111308038235
        max_q: 3.2772533893585205
        mean_q: 3.2057595252990723
        mean_td_error: -0.05633842200040817
        min_q: 3.161770820617676
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01015555951744318
        max_q: 3.2875208854675293
        mean_q: 3.1415185928344727
        mean_td_error: -0.09648823738098145
        min_q: 2.8808741569519043
    num_steps_sampled: 1707008
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1667,3542.96,1707008,52.7673,57.4868,49.6031,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-06-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.48683741223051
  episode_reward_mean: 53.95971666166939
  episode_reward_min: 50.421190004598934
  episodes_this_iter: 8
  episodes_total: 17096
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1710080
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007609655614942312
        max_q: 3.457379102706909
        mean_q: 3.345137596130371
        mean_td_error: -0.12915733456611633
        min_q: 3.2805075645446777
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0127185620367527
        max_q: 3.4105379581451416
        mean_q: 3.2851600646972656
        mean_td_error: 0.13641154766082764
        min_q: 3.2134897708892822
    num_steps_sampled: 1710080
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1670,3549.84,1710080,53.9597,57.4868,50.4212,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-06-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.199137348800676
  episode_reward_mean: 55.59639756447714
  episode_reward_min: 50.421190004598934
  episodes_this_iter: 8
  episodes_total: 17128
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1713152
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010355311445891857
        max_q: 3.620777130126953
        mean_q: 3.3125810623168945
        mean_td_error: -0.187421977519989
        min_q: 3.184117317199707
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021008048206567764
        max_q: 3.6918857097625732
        mean_q: 3.635032892227173
        mean_td_error: 0.01380949467420578
        min_q: 3.5271706581115723
    num_steps_sampled: 1713152
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1673,3556.81,1713152,55.5964,59.1991,50.4212,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-06-49
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.57153135444366
  episode_reward_mean: 56.728483980354625
  episode_reward_min: 51.57681459663134
  episodes_this_iter: 16
  episodes_total: 17152
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1715200
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0040865708142519
        max_q: 3.4712510108947754
        mean_q: 3.2497901916503906
        mean_td_error: 0.059727251529693604
        min_q: 3.140181541442871
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004208939615637064
        max_q: 4.038298606872559
        mean_q: 3.9512951374053955
        mean_td_error: 0.03626028448343277
        min_q: 3.8890459537506104
    num_steps_sampled: 1715200
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1675,3561.69,1715200,56.7285,60.5715,51.5768,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-06-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.57153135444366
  episode_reward_mean: 56.800727351371364
  episode_reward_min: 53.291320864032336
  episodes_this_iter: 8
  episodes_total: 17168
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1717248
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004254295490682125
        max_q: 3.35304594039917
        mean_q: 3.208669424057007
        mean_td_error: 0.05737747251987457
        min_q: 3.113649845123291
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012765198945999146
        max_q: 4.144261360168457
        mean_q: 4.060240268707275
        mean_td_error: -0.12473725527524948
        min_q: 3.967601776123047
    num_steps_sampled: 1717248
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1677,3566.73,1717248,56.8007,60.5715,53.2913,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-07-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.57153135444366
  episode_reward_mean: 57.439265205679575
  episode_reward_min: 53.291320864032336
  episodes_this_iter: 16
  episodes_total: 17192
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1719296
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006477441173046827
        max_q: 3.4197099208831787
        mean_q: 3.263946056365967
        mean_td_error: -0.12041984498500824
        min_q: 3.18774151802063
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003941692411899567
        max_q: 4.182888031005859
        mean_q: 4.1097259521484375
        mean_td_error: -0.023677892982959747
        min_q: 4.045340538024902
    num_steps_sampled: 1719296
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1679,3572.36,1719296,57.4393,60.5715,53.2913,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-07-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.57153135444366
  episode_reward_mean: 57.473923154010954
  episode_reward_min: 53.2012674706248
  episodes_this_iter: 8
  episodes_total: 17208
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1721344
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003367285244166851
        max_q: 3.3363535404205322
        mean_q: 3.2570557594299316
        mean_td_error: -0.05429340898990631
        min_q: 3.160007953643799
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002798451343551278
        max_q: 4.121018409729004
        mean_q: 3.9321446418762207
        mean_td_error: -0.016801126301288605
        min_q: 3.786372661590576
    num_steps_sampled: 1721344
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1681,3577.85,1721344,57.4739,60.5715,53.2013,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-07-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.57153135444366
  episode_reward_mean: 57.551620005918366
  episode_reward_min: 53.2012674706248
  episodes_this_iter: 16
  episodes_total: 17232
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1723392
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.031183401122689247
        max_q: 3.0742247104644775
        mean_q: 2.9235103130340576
        mean_td_error: -0.5259425044059753
        min_q: 2.7664313316345215
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0034947574604302645
        max_q: 4.215881824493408
        mean_q: 4.069746971130371
        mean_td_error: 0.04374236613512039
        min_q: 3.9564244747161865
    num_steps_sampled: 1723392
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1683,3584.38,1723392,57.5516,60.5715,53.2013,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-07-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.57153135444366
  episode_reward_mean: 57.16752382510719
  episode_reward_min: 53.2012674706248
  episodes_this_iter: 8
  episodes_total: 17248
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1725440
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008043158799409866
        max_q: 2.996065855026245
        mean_q: 2.8181371688842773
        mean_td_error: -0.1279938668012619
        min_q: 2.6238677501678467
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032292644027620554
        max_q: 4.344461917877197
        mean_q: 4.277348518371582
        mean_td_error: -0.042716726660728455
        min_q: 4.158543586730957
    num_steps_sampled: 1725440
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1685,3591.74,1725440,57.1675,60.5715,53.2013,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-07-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.55610111972216
  episode_reward_mean: 57.12340288265439
  episode_reward_min: 53.2012674706248
  episodes_this_iter: 8
  episodes_total: 17272
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1727488
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00939644780009985
        max_q: 3.1277241706848145
        mean_q: 3.0618209838867188
        mean_td_error: -0.17823708057403564
        min_q: 3.007343292236328
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030339525546878576
        max_q: 4.585977554321289
        mean_q: 4.511852741241455
        mean_td_error: -0.045818030834198
        min_q: 4.4336395263671875
    num_steps_sampled: 1727488
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1687,3598.85,1727488,57.1234,60.5561,53.2013,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-07-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.55610111972216
  episode_reward_mean: 56.73497435630254
  episode_reward_min: 53.2012674706248
  episodes_this_iter: 8
  episodes_total: 17288
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1729536
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004250239580869675
        max_q: 3.12137770652771
        mean_q: 2.974076986312866
        mean_td_error: -0.07239565253257751
        min_q: 2.86273193359375
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028106621466577053
        max_q: 4.791501522064209
        mean_q: 4.704092979431152
        mean_td_error: 0.00812336802482605
        min_q: 4.615159034729004
    num_steps_sampled: 1729536
    num_steps_trained: 17

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1689,3605.46,1729536,56.735,60.5561,53.2013,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-07-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.464915643789574
  episode_reward_mean: 56.87281942713091
  episode_reward_min: 54.657354096888085
  episodes_this_iter: 8
  episodes_total: 17312
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1731584
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005782288499176502
        max_q: 3.2989933490753174
        mean_q: 3.199110269546509
        mean_td_error: -0.10051114112138748
        min_q: 3.0955984592437744
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004081187304109335
        max_q: 4.951549530029297
        mean_q: 4.90233039855957
        mean_td_error: -0.06235255300998688
        min_q: 4.773947715759277
    num_steps_sampled: 1731584
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1691,3611.44,1731584,56.8728,60.4649,54.6574,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-07-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.464915643789574
  episode_reward_mean: 56.049085404080714
  episode_reward_min: 52.93613871587453
  episodes_this_iter: 16
  episodes_total: 17336
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1733632
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011808277107775211
        max_q: 3.2872672080993652
        mean_q: 3.1335670948028564
        mean_td_error: -0.21322286128997803
        min_q: 2.994709014892578
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010399945313110948
        max_q: 5.047487258911133
        mean_q: 4.984135150909424
        mean_td_error: -0.001975014805793762
        min_q: 4.946312427520752
    num_steps_sampled: 1733632
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1693,3617.13,1733632,56.0491,60.4649,52.9361,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-07-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.35380727170472
  episode_reward_mean: 56.219894507511256
  episode_reward_min: 52.93613871587453
  episodes_this_iter: 8
  episodes_total: 17352
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1735680
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015191531740128994
        max_q: 3.407470464706421
        mean_q: 3.028980255126953
        mean_td_error: -0.1896667182445526
        min_q: 2.8556911945343018
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002035770332440734
        max_q: 5.119287967681885
        mean_q: 5.051711082458496
        mean_td_error: 0.020028650760650635
        min_q: 4.980737209320068
    num_steps_sampled: 1735680
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1695,3622.52,1735680,56.2199,60.3538,52.9361,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-07-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.35380727170472
  episode_reward_mean: 55.78900494453218
  episode_reward_min: 52.568991905825854
  episodes_this_iter: 16
  episodes_total: 17376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1737728
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.045593664050102234
        max_q: 3.063004732131958
        mean_q: 2.118232250213623
        mean_td_error: -0.506708562374115
        min_q: 1.7477574348449707
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019853049889206886
        max_q: 5.35435152053833
        mean_q: 5.159301280975342
        mean_td_error: 0.006531074643135071
        min_q: 5.080050945281982
    num_steps_sampled: 1737728
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1697,3627.74,1737728,55.789,60.3538,52.569,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-08-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.35380727170472
  episode_reward_mean: 55.312968431191
  episode_reward_min: 47.519275697013136
  episodes_this_iter: 8
  episodes_total: 17392
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1739776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035518486984074116
        max_q: 2.559156894683838
        mean_q: 2.339766025543213
        mean_td_error: 0.008138932287693024
        min_q: 2.1668052673339844
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0072828237898647785
        max_q: 5.324044704437256
        mean_q: 5.231148719787598
        mean_td_error: 0.09291259944438934
        min_q: 5.1660356521606445
    num_steps_sampled: 1739776
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1699,3632.94,1739776,55.313,60.3538,47.5193,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-08-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.35380727170472
  episode_reward_mean: 54.59296957833134
  episode_reward_min: 47.519275697013136
  episodes_this_iter: 8
  episodes_total: 17416
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1741824
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.05501740053296089
        max_q: 2.1787378787994385
        mean_q: 1.9128473997116089
        mean_td_error: -0.6395946741104126
        min_q: 1.6046229600906372
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024872682988643646
        max_q: 5.326125144958496
        mean_q: 5.162017822265625
        mean_td_error: 0.03925429284572601
        min_q: 5.0741424560546875
    num_steps_sampled: 1741824
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1701,3638.04,1741824,54.593,60.3538,47.5193,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-08-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.35380727170472
  episode_reward_mean: 53.97873282911227
  episode_reward_min: 47.519275697013136
  episodes_this_iter: 8
  episodes_total: 17432
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1743872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04746958613395691
        max_q: 2.533845901489258
        mean_q: 2.294086217880249
        mean_td_error: -0.5617350935935974
        min_q: 2.0411927700042725
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019830912351608276
        max_q: 5.221856117248535
        mean_q: 5.157449722290039
        mean_td_error: -0.012749433517456055
        min_q: 5.12021017074585
    num_steps_sampled: 1743872
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1703,3643.07,1743872,53.9787,60.3538,47.5193,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-08-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.395371884068325
  episode_reward_mean: 54.075541118541196
  episode_reward_min: 47.519275697013136
  episodes_this_iter: 8
  episodes_total: 17456
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1745920
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.037539850920438766
        max_q: 2.466273307800293
        mean_q: 2.2072367668151855
        mean_td_error: -0.3153012990951538
        min_q: 2.062903881072998
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017903574043884873
        max_q: 5.292784690856934
        mean_q: 5.227952480316162
        mean_td_error: -0.022241145372390747
        min_q: 5.158061504364014
    num_steps_sampled: 1745920
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1705,3648.09,1745920,54.0755,59.3954,47.5193,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-08-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.395371884068325
  episode_reward_mean: 53.95009736832868
  episode_reward_min: 47.519275697013136
  episodes_this_iter: 8
  episodes_total: 17472
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1747968
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010398178361356258
        max_q: 2.662801742553711
        mean_q: 2.289196491241455
        mean_td_error: -0.0913635864853859
        min_q: 1.9731624126434326
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001858812989667058
        max_q: 5.472238540649414
        mean_q: 5.283452987670898
        mean_td_error: -0.015665724873542786
        min_q: 5.145931720733643
    num_steps_sampled: 1747968
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1707,3653.43,1747968,53.9501,59.3954,47.5193,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-08-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.866182811251946
  episode_reward_mean: 54.32567538249344
  episode_reward_min: 49.00847579007379
  episodes_this_iter: 8
  episodes_total: 17496
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1750016
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0735752284526825
        max_q: 2.106473445892334
        mean_q: 1.8121857643127441
        mean_td_error: -0.6469413042068481
        min_q: 1.5269408226013184
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01660567708313465
        max_q: 5.613685607910156
        mean_q: 4.73078727722168
        mean_td_error: -0.21225833892822266
        min_q: 4.444632053375244
    num_steps_sampled: 1750016
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1709,3659.32,1750016,54.3257,58.8662,49.0085,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-08-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.67074778611388
  episode_reward_mean: 54.871434059123075
  episode_reward_min: 49.00847579007379
  episodes_this_iter: 16
  episodes_total: 17520
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1752064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015035498887300491
        max_q: 1.9180744886398315
        mean_q: 1.6635446548461914
        mean_td_error: -0.1260286420583725
        min_q: 1.4971709251403809
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02279706671833992
        max_q: 5.27180290222168
        mean_q: 5.002030372619629
        mean_td_error: 0.38563230633735657
        min_q: 4.823525428771973
    num_steps_sampled: 1752064
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1711,3665.93,1752064,54.8714,60.6707,49.0085,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-08-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.67074778611388
  episode_reward_mean: 55.376488394008156
  episode_reward_min: 49.00847579007379
  episodes_this_iter: 8
  episodes_total: 17536
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1754112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013358964584767818
        max_q: 1.9596445560455322
        mean_q: 1.8227039575576782
        mean_td_error: -0.11906197667121887
        min_q: 1.6639516353607178
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004272091668099165
        max_q: 4.8021464347839355
        mean_q: 4.673590660095215
        mean_td_error: -0.07505002617835999
        min_q: 4.582702159881592
    num_steps_sampled: 1754112
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1713,3673.54,1754112,55.3765,60.6707,49.0085,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-08-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.67074778611388
  episode_reward_mean: 54.87706985647864
  episode_reward_min: 49.00847579007379
  episodes_this_iter: 16
  episodes_total: 17560
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1756160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013394814915955067
        max_q: 2.706076145172119
        mean_q: 2.539076328277588
        mean_td_error: -0.1108938604593277
        min_q: 2.397674322128296
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032453450839966536
        max_q: 4.797457218170166
        mean_q: 4.720117568969727
        mean_td_error: -0.04198293387889862
        min_q: 4.578990936279297
    num_steps_sampled: 1756160
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1715,3681.4,1756160,54.8771,60.6707,49.0085,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-09-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.67074778611388
  episode_reward_mean: 55.15249267419247
  episode_reward_min: 50.47375450092921
  episodes_this_iter: 8
  episodes_total: 17576
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1758208
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.042388979345560074
        max_q: 2.509978771209717
        mean_q: 2.3519630432128906
        mean_td_error: -0.3667215406894684
        min_q: 2.1443445682525635
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006791429594159126
        max_q: 4.828305721282959
        mean_q: 4.652760028839111
        mean_td_error: -0.11524751782417297
        min_q: 4.539610862731934
    num_steps_sampled: 1758208
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1717,3689.02,1758208,55.1525,60.6707,50.4738,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-09-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.67074778611388
  episode_reward_mean: 53.88437829011348
  episode_reward_min: 47.59660604834076
  episodes_this_iter: 8
  episodes_total: 17600
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1760256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006266822572797537
        max_q: 2.8502957820892334
        mean_q: 2.740490674972534
        mean_td_error: -0.0437442883849144
        min_q: 2.6230385303497314
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008914154022932053
        max_q: 4.604135036468506
        mean_q: 4.416733264923096
        mean_td_error: -0.14354395866394043
        min_q: 4.290380477905273
    num_steps_sampled: 1760256
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1719,3695.92,1760256,53.8844,60.6707,47.5966,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-09-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.65449568672984
  episode_reward_mean: 53.43413396097803
  episode_reward_min: 47.59660604834076
  episodes_this_iter: 8
  episodes_total: 17616
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1762304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021097997203469276
        max_q: 2.857670307159424
        mean_q: 2.723719596862793
        mean_td_error: -0.18187376856803894
        min_q: 2.535569667816162
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007373177912086248
        max_q: 4.488717555999756
        mean_q: 4.385558128356934
        mean_td_error: -0.1260959506034851
        min_q: 4.284796714782715
    num_steps_sampled: 1762304
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1721,3702.65,1762304,53.4341,56.6545,47.5966,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-09-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.65449568672984
  episode_reward_mean: 53.112884447934164
  episode_reward_min: 47.59660604834076
  episodes_this_iter: 8
  episodes_total: 17640
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1764352
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014340237714350224
        max_q: 2.958630323410034
        mean_q: 2.757239580154419
        mean_td_error: -0.09212412685155869
        min_q: 2.6706621646881104
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003282340243458748
        max_q: 4.652886390686035
        mean_q: 4.456735610961914
        mean_td_error: 0.045652419328689575
        min_q: 4.354127883911133
    num_steps_sampled: 1764352
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1723,3708.88,1764352,53.1129,56.6545,47.5966,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-09-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.65449568672984
  episode_reward_mean: 52.86139654958755
  episode_reward_min: 47.59660604834076
  episodes_this_iter: 16
  episodes_total: 17664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1766400
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013814096339046955
        max_q: 3.3944947719573975
        mean_q: 3.2916760444641113
        mean_td_error: 0.19369181990623474
        min_q: 3.198460340499878
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002199122216552496
        max_q: 4.2745184898376465
        mean_q: 4.190112590789795
        mean_td_error: -0.026758000254631042
        min_q: 4.119560241699219
    num_steps_sampled: 1766400
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1725,3714.71,1766400,52.8614,56.6545,47.5966,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-09-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.10232155634009
  episode_reward_mean: 52.7765097579682
  episode_reward_min: 47.59660604834076
  episodes_this_iter: 8
  episodes_total: 17680
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1768448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005014766938984394
        max_q: 3.800156593322754
        mean_q: 3.719454288482666
        mean_td_error: 0.06337609887123108
        min_q: 3.664015293121338
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011274612508714199
        max_q: 4.1320881843566895
        mean_q: 3.950188398361206
        mean_td_error: -0.16293129324913025
        min_q: 3.8636436462402344
    num_steps_sampled: 1768448
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1727,3720.23,1768448,52.7765,55.1023,47.5966,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-09-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.765377055085565
  episode_reward_mean: 54.08480913499695
  episode_reward_min: 50.659787056594396
  episodes_this_iter: 16
  episodes_total: 17704
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1770496
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008782708900980651
        max_q: 4.19126558303833
        mean_q: 4.086587905883789
        mean_td_error: 0.0059892162680625916
        min_q: 3.9853129386901855
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014718370512127876
        max_q: 4.045375823974609
        mean_q: 3.905517101287842
        mean_td_error: -0.2190231829881668
        min_q: 3.777330160140991
    num_steps_sampled: 1770496
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1729,3725.75,1770496,54.0848,58.7654,50.6598,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-09-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.765377055085565
  episode_reward_mean: 54.05346884825797
  episode_reward_min: 51.029662739622
  episodes_this_iter: 8
  episodes_total: 17720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1772544
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009094687993638217
        max_q: 4.38180685043335
        mean_q: 4.339516639709473
        mean_td_error: -0.011818528175354004
        min_q: 4.271854400634766
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005741691682487726
        max_q: 4.004329681396484
        mean_q: 3.9371085166931152
        mean_td_error: 0.09563423693180084
        min_q: 3.7224550247192383
    num_steps_sampled: 1772544
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1731,3731.03,1772544,54.0535,58.7654,51.0297,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-09-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.765377055085565
  episode_reward_mean: 53.4766081141958
  episode_reward_min: 47.45277436746818
  episodes_this_iter: 16
  episodes_total: 17744
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1774592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0005909688188694417
        max_q: 4.725823879241943
        mean_q: 4.668416500091553
        mean_td_error: 0.004411906003952026
        min_q: 4.589352607727051
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014530213549733162
        max_q: 4.084175109863281
        mean_q: 3.8970530033111572
        mean_td_error: -0.13784807920455933
        min_q: 3.8039534091949463
    num_steps_sampled: 1774592
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1733,3736.31,1774592,53.4766,58.7654,47.4528,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-09-55
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.765377055085565
  episode_reward_mean: 53.52169515588914
  episode_reward_min: 47.45277436746818
  episodes_this_iter: 8
  episodes_total: 17760
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1776640
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028073005378246307
        max_q: 5.063679218292236
        mean_q: 5.028135776519775
        mean_td_error: 0.05443529784679413
        min_q: 4.920887470245361
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007563774473965168
        max_q: 4.152122497558594
        mean_q: 3.8476741313934326
        mean_td_error: 0.07703089714050293
        min_q: 3.537409782409668
    num_steps_sampled: 1776640
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1735,3741.61,1776640,53.5217,58.7654,47.4528,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-10-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.765377055085565
  episode_reward_mean: 53.22982658169889
  episode_reward_min: 47.45277436746818
  episodes_this_iter: 8
  episodes_total: 17784
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1778688
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002345752203837037
        max_q: 5.362311840057373
        mean_q: 5.32137393951416
        mean_td_error: 0.045779481530189514
        min_q: 5.27809476852417
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03399375453591347
        max_q: 4.396448135375977
        mean_q: 3.579545021057129
        mean_td_error: -0.33969926834106445
        min_q: 3.2459521293640137
    num_steps_sampled: 1778688
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1737,3746.9,1778688,53.2298,58.7654,47.4528,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-10-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.765377055085565
  episode_reward_mean: 52.52809138566081
  episode_reward_min: 47.45277436746818
  episodes_this_iter: 8
  episodes_total: 17800
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1780736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0006012454978190362
        max_q: 5.511811256408691
        mean_q: 5.442462921142578
        mean_td_error: -0.010219365358352661
        min_q: 5.3404059410095215
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023651963099837303
        max_q: 3.2070674896240234
        mean_q: 2.4606969356536865
        mean_td_error: -0.13930916786193848
        min_q: 1.5054638385772705
    num_steps_sampled: 1780736
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1739,3752.35,1780736,52.5281,58.7654,47.4528,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-10-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.202669023138604
  episode_reward_mean: 52.120245819593094
  episode_reward_min: 47.45277436746818
  episodes_this_iter: 8
  episodes_total: 17824
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1782784
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011802356457337737
        max_q: 5.446933746337891
        mean_q: 5.396485328674316
        mean_td_error: 0.002459898591041565
        min_q: 5.267796993255615
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008037214167416096
        max_q: 2.4804294109344482
        mean_q: 2.0473906993865967
        mean_td_error: 0.03964465484023094
        min_q: 1.8104803562164307
    num_steps_sampled: 1782784
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1741,3757.84,1782784,52.1202,55.2027,47.4528,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-10-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.33638236804016
  episode_reward_mean: 52.23378630637415
  episode_reward_min: 47.95177368729668
  episodes_this_iter: 16
  episodes_total: 17848
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1784832
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0006694909534417093
        max_q: 5.4428486824035645
        mean_q: 5.395166397094727
        mean_td_error: -0.0013824999332427979
        min_q: 5.283749103546143
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026533000636845827
        max_q: 2.2613115310668945
        mean_q: 2.1867926120758057
        mean_td_error: -0.025152109563350677
        min_q: 2.0523271560668945
    num_steps_sampled: 1784832
    num_step

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1743,3763.4,1784832,52.2338,56.3364,47.9518,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-10-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.33638236804016
  episode_reward_mean: 52.29238966052217
  episode_reward_min: 47.95177368729668
  episodes_this_iter: 8
  episodes_total: 17864
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1786880
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003103202674537897
        max_q: 5.431815147399902
        mean_q: 5.340912818908691
        mean_td_error: 0.06229980289936066
        min_q: 5.168943881988525
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003069203579798341
        max_q: 2.8123886585235596
        mean_q: 2.764803886413574
        mean_td_error: 0.030049219727516174
        min_q: 2.6405043601989746
    num_steps_sampled: 1786880
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1745,3768.9,1786880,52.2924,56.3364,47.9518,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-10-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.33638236804016
  episode_reward_mean: 51.89416972543905
  episode_reward_min: 47.95177368729668
  episodes_this_iter: 16
  episodes_total: 17888
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1788928
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016354310791939497
        max_q: 5.271097660064697
        mean_q: 5.197373867034912
        mean_td_error: -0.025580331683158875
        min_q: 5.153467655181885
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014969749609008431
        max_q: 3.5750112533569336
        mean_q: 3.4717414379119873
        mean_td_error: 0.03268510848283768
        min_q: 3.291933536529541
    num_steps_sampled: 1788928
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1747,3774.61,1788928,51.8942,56.3364,47.9518,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-10-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.33638236804016
  episode_reward_mean: 51.45805621276326
  episode_reward_min: 47.85965933306164
  episodes_this_iter: 8
  episodes_total: 17904
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1790976
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001113657490350306
        max_q: 5.282607555389404
        mean_q: 5.234530925750732
        mean_td_error: 0.01622168719768524
        min_q: 5.1392292976379395
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014042763505131006
        max_q: 4.116225719451904
        mean_q: 4.049073219299316
        mean_td_error: 0.02425030618906021
        min_q: 3.864898443222046
    num_steps_sampled: 1790976
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1749,3780.62,1790976,51.4581,56.3364,47.8597,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-10-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.33638236804016
  episode_reward_mean: 50.91874469253192
  episode_reward_min: 47.85965933306164
  episodes_this_iter: 8
  episodes_total: 17928
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1793024
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002398280892521143
        max_q: 5.400223731994629
        mean_q: 5.1881184577941895
        mean_td_error: -0.010762050747871399
        min_q: 5.008182048797607
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002173179993405938
        max_q: 4.504930019378662
        mean_q: 4.45283317565918
        mean_td_error: 0.05712002515792847
        min_q: 4.286189556121826
    num_steps_sampled: 1793024
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1751,3787.23,1793024,50.9187,56.3364,47.8597,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-10-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.56952552288448
  episode_reward_mean: 50.740447823763404
  episode_reward_min: 47.85965933306164
  episodes_this_iter: 8
  episodes_total: 17944
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1795072
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002261324319988489
        max_q: 5.287337303161621
        mean_q: 5.215198993682861
        mean_td_error: 0.0473247766494751
        min_q: 5.168544292449951
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0006113820127211511
        max_q: 4.758993148803711
        mean_q: 4.715004920959473
        mean_td_error: 0.015493214130401611
        min_q: 4.5904998779296875
    num_steps_sampled: 1795072
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1753,3794.11,1795072,50.7404,54.5695,47.8597,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-10-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.06152086299568
  episode_reward_mean: 50.60843740276069
  episode_reward_min: 47.85965933306164
  episodes_this_iter: 8
  episodes_total: 17968
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1797120
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029125846922397614
        max_q: 5.167358875274658
        mean_q: 5.120001316070557
        mean_td_error: -0.05213974416255951
        min_q: 5.087266445159912
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008773790905252099
        max_q: 4.881285667419434
        mean_q: 4.86177921295166
        mean_td_error: -0.018457412719726562
        min_q: 4.7881646156311035
    num_steps_sampled: 1797120
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1755,3800.98,1797120,50.6084,54.0615,47.8597,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-11-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.06152086299568
  episode_reward_mean: 50.643423738470254
  episode_reward_min: 47.85965933306164
  episodes_this_iter: 8
  episodes_total: 17984
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1799168
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001175465527921915
        max_q: 5.1596808433532715
        mean_q: 4.997161865234375
        mean_td_error: -0.006648391485214233
        min_q: 4.923472881317139
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0007107920246198773
        max_q: 5.0101518630981445
        mean_q: 4.978565216064453
        mean_td_error: -0.009617313742637634
        min_q: 4.875553131103516
    num_steps_sampled: 1799168
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1757,3807.66,1799168,50.6434,54.0615,47.8597,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-11-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.46576527835481
  episode_reward_mean: 51.34425831500978
  episode_reward_min: 48.8066550817993
  episodes_this_iter: 8
  episodes_total: 18008
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1801216
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012908431701362133
        max_q: 5.202437877655029
        mean_q: 4.7012553215026855
        mean_td_error: -0.2047378420829773
        min_q: 4.501302242279053
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0005370269063860178
        max_q: 5.134185791015625
        mean_q: 5.082333087921143
        mean_td_error: -0.0018784254789352417
        min_q: 4.986324787139893
    num_steps_sampled: 1801216
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1759,3814.06,1801216,51.3443,54.4658,48.8067,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-11-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.46576527835481
  episode_reward_mean: 51.52183940884246
  episode_reward_min: 48.73649868521828
  episodes_this_iter: 16
  episodes_total: 18032
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1803264
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029021529480814934
        max_q: 5.076676845550537
        mean_q: 4.944113731384277
        mean_td_error: -0.01739630103111267
        min_q: 4.8509979248046875
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008686598157510161
        max_q: 5.1331706047058105
        mean_q: 5.0933074951171875
        mean_td_error: -0.02173939347267151
        min_q: 4.9770073890686035
    num_steps_sampled: 1803264
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1761,3820.26,1803264,51.5218,54.4658,48.7365,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-11-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.46576527835481
  episode_reward_mean: 51.11138491935981
  episode_reward_min: 48.73649868521828
  episodes_this_iter: 8
  episodes_total: 18048
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1805312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00093799305614084
        max_q: 5.127504825592041
        mean_q: 5.059830188751221
        mean_td_error: 0.010472401976585388
        min_q: 4.95367431640625
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012972300173714757
        max_q: 5.158466339111328
        mean_q: 5.118043422698975
        mean_td_error: -0.02433660626411438
        min_q: 5.027165412902832
    num_steps_sampled: 1805312
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1763,3826.42,1805312,51.1114,54.4658,48.7365,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-11-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.46576527835481
  episode_reward_mean: 50.6532780536943
  episode_reward_min: 47.523361946312455
  episodes_this_iter: 16
  episodes_total: 18072
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1807360
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015219037886708975
        max_q: 5.092923641204834
        mean_q: 5.0513505935668945
        mean_td_error: 0.024502500891685486
        min_q: 5.00002908706665
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010949383722618222
        max_q: 5.13052225112915
        mean_q: 5.073735237121582
        mean_td_error: -0.013134554028511047
        min_q: 5.0033063888549805
    num_steps_sampled: 1807360
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1765,3832.21,1807360,50.6533,54.4658,47.5234,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-11-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.46576527835481
  episode_reward_mean: 50.02779860513689
  episode_reward_min: 47.523361946312455
  episodes_this_iter: 8
  episodes_total: 18088
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1809408
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011118159163743258
        max_q: 4.961899757385254
        mean_q: 4.930629730224609
        mean_td_error: 0.01443663239479065
        min_q: 4.815952301025391
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0006834187661297619
        max_q: 5.090691566467285
        mean_q: 5.017651557922363
        mean_td_error: 0.007150769233703613
        min_q: 4.931878089904785
    num_steps_sampled: 1809408
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1767,3838.42,1809408,50.0278,54.4658,47.5234,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-11-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 52.6991502027215
  episode_reward_mean: 49.539933761894154
  episode_reward_min: 47.523361946312455
  episodes_this_iter: 8
  episodes_total: 18112
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1811456
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0005830589798279107
        max_q: 5.010761737823486
        mean_q: 4.963169097900391
        mean_td_error: -0.0020287781953811646
        min_q: 4.853484630584717
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001097215455956757
        max_q: 5.023997783660889
        mean_q: 4.978337287902832
        mean_td_error: 0.02764454483985901
        min_q: 4.875920295715332
    num_steps_sampled: 1811456
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1769,3845.5,1811456,49.5399,52.6992,47.5234,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-11-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 51.7596877982454
  episode_reward_mean: 49.61581341864985
  episode_reward_min: 47.523361946312455
  episodes_this_iter: 8
  episodes_total: 18128
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1813504
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032460030633956194
        max_q: 4.930107593536377
        mean_q: 4.84501314163208
        mean_td_error: -0.05839383602142334
        min_q: 4.793247699737549
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0007688786135986447
        max_q: 4.972949028015137
        mean_q: 4.921171188354492
        mean_td_error: 0.009728938341140747
        min_q: 4.859336853027344
    num_steps_sampled: 1813504
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1771,3853.45,1813504,49.6158,51.7597,47.5234,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-12-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 51.7596877982454
  episode_reward_mean: 48.866213814925544
  episode_reward_min: 45.365438564804826
  episodes_this_iter: 8
  episodes_total: 18152
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1815552
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007692729588598013
        max_q: 4.8899641036987305
        mean_q: 4.825362682342529
        mean_td_error: -0.12352368235588074
        min_q: 4.767703533172607
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008328424300998449
        max_q: 4.943185806274414
        mean_q: 4.911526679992676
        mean_td_error: 0.005127802491188049
        min_q: 4.864747524261475
    num_steps_sampled: 1815552
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1773,3861.83,1815552,48.8662,51.7597,45.3654,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-12-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 51.7596877982454
  episode_reward_mean: 48.88157793406033
  episode_reward_min: 45.365438564804826
  episodes_this_iter: 8
  episodes_total: 18160
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1816576
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023169543128460646
        max_q: 4.797110557556152
        mean_q: 4.766312599182129
        mean_td_error: -0.04385444521903992
        min_q: 4.698135852813721
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009596796589903533
        max_q: 4.877501010894775
        mean_q: 4.839301109313965
        mean_td_error: -0.016633808612823486
        min_q: 4.763022422790527
    num_steps_sampled: 1816576
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1774,3866.77,1816576,48.8816,51.7597,45.3654,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-12-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 51.7596877982454
  episode_reward_mean: 48.77796112511474
  episode_reward_min: 45.365438564804826
  episodes_this_iter: 8
  episodes_total: 18184
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1818624
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0059677185490727425
        max_q: 4.928948879241943
        mean_q: 4.877540111541748
        mean_td_error: -0.11443477869033813
        min_q: 4.76348876953125
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013478340115398169
        max_q: 4.941832542419434
        mean_q: 4.828297138214111
        mean_td_error: -0.008019879460334778
        min_q: 4.7640061378479
    num_steps_sampled: 1818624
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1776,3876.78,1818624,48.778,51.7597,45.3654,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-12-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 51.7596877982454
  episode_reward_mean: 48.36398431417243
  episode_reward_min: 44.909690040624184
  episodes_this_iter: 8
  episodes_total: 18192
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1819648
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003976315725594759
        max_q: 4.806996822357178
        mean_q: 4.69554328918457
        mean_td_error: -0.057289689779281616
        min_q: 4.5266032218933105
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0005071379127912223
        max_q: 4.879993915557861
        mean_q: 4.841418266296387
        mean_td_error: 0.00025548040866851807
        min_q: 4.818558692932129
    num_steps_sampled: 1819648
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1777,3882.21,1819648,48.364,51.7597,44.9097,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-12-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 51.7596877982454
  episode_reward_mean: 48.06204600994848
  episode_reward_min: 44.909690040624184
  episodes_this_iter: 8
  episodes_total: 18200
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1820672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025493400171399117
        max_q: 4.931400775909424
        mean_q: 4.688263416290283
        mean_td_error: 0.03564836084842682
        min_q: 4.488481521606445
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021914856042712927
        max_q: 4.852787017822266
        mean_q: 4.813412666320801
        mean_td_error: -0.04769885540008545
        min_q: 4.764578819274902
    num_steps_sampled: 1820672
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1778,3887.35,1820672,48.062,51.7597,44.9097,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-12-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 50.36449853729894
  episode_reward_mean: 47.492707678757235
  episode_reward_min: 44.909690040624184
  episodes_this_iter: 16
  episodes_total: 18216
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1821696
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021198957692831755
        max_q: 4.829780578613281
        mean_q: 4.61027193069458
        mean_td_error: -0.009538844227790833
        min_q: 4.452569007873535
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009229178540408611
        max_q: 4.874777317047119
        mean_q: 4.808672904968262
        mean_td_error: 0.0066356658935546875
        min_q: 4.747872352600098
    num_steps_sampled: 1821696
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1779,3892.46,1821696,47.4927,50.3645,44.9097,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-12-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 50.16357425254454
  episode_reward_mean: 47.199137719567986
  episode_reward_min: 44.909690040624184
  episodes_this_iter: 8
  episodes_total: 18224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1822720
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010243896394968033
        max_q: 4.7223405838012695
        mean_q: 4.696387767791748
        mean_td_error: 0.1827794462442398
        min_q: 4.595426559448242
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011679906165227294
        max_q: 4.923435211181641
        mean_q: 4.845603942871094
        mean_td_error: -0.018917828798294067
        min_q: 4.800279140472412
    num_steps_sampled: 1822720
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1780,3897.38,1822720,47.1991,50.1636,44.9097,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-12-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 49.57059053854707
  episode_reward_mean: 47.26964940872206
  episode_reward_min: 44.909690040624184
  episodes_this_iter: 8
  episodes_total: 18232
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1823744
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.000837389612570405
        max_q: 4.680064678192139
        mean_q: 4.5709075927734375
        mean_td_error: 0.014503270387649536
        min_q: 4.522573947906494
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0006409254274331033
        max_q: 4.924233436584473
        mean_q: 4.88401985168457
        mean_td_error: -0.005995020270347595
        min_q: 4.794837951660156
    num_steps_sampled: 1823744
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1781,3902.27,1823744,47.2696,49.5706,44.9097,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-12-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 49.57059053854707
  episode_reward_mean: 47.491345805212184
  episode_reward_min: 44.909690040624184
  episodes_this_iter: 16
  episodes_total: 18256
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1825792
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008255057036876678
        max_q: 4.484212398529053
        mean_q: 4.267622470855713
        mean_td_error: 0.16045664250850677
        min_q: 4.096415996551514
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013244429137557745
        max_q: 4.950703144073486
        mean_q: 4.907081127166748
        mean_td_error: -0.020433589816093445
        min_q: 4.868024826049805
    num_steps_sampled: 1825792
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1783,3910.57,1825792,47.4913,49.5706,44.9097,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-12-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 50.77100631816266
  episode_reward_mean: 47.687548341933514
  episode_reward_min: 44.909690040624184
  episodes_this_iter: 8
  episodes_total: 18272
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1827840
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005936472211033106
        max_q: 4.125953674316406
        mean_q: 4.060277462005615
        mean_td_error: -0.1158314049243927
        min_q: 3.958920955657959
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021793257910758257
        max_q: 4.944349765777588
        mean_q: 4.9162750244140625
        mean_td_error: 0.04672911763191223
        min_q: 4.792108535766602
    num_steps_sampled: 1827840
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1785,3918.47,1827840,47.6875,50.771,44.9097,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-13-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 50.77100631816266
  episode_reward_mean: 47.786497675797364
  episode_reward_min: 45.387541196533995
  episodes_this_iter: 8
  episodes_total: 18296
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1829888
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002812594873830676
        max_q: 4.052231311798096
        mean_q: 4.004326343536377
        mean_td_error: -0.05283387750387192
        min_q: 3.9811604022979736
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021803556010127068
        max_q: 4.8945465087890625
        mean_q: 4.804261207580566
        mean_td_error: -0.03462083637714386
        min_q: 4.7657318115234375
    num_steps_sampled: 1829888
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1787,3925.67,1829888,47.7865,50.771,45.3875,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-13-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 50.77100631816266
  episode_reward_mean: 47.80117728758959
  episode_reward_min: 45.387541196533995
  episodes_this_iter: 8
  episodes_total: 18312
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1831936
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015065560583025217
        max_q: 4.135972499847412
        mean_q: 4.041579723358154
        mean_td_error: -0.02424296736717224
        min_q: 3.9937570095062256
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010460859630256891
        max_q: 4.808123588562012
        mean_q: 4.77431058883667
        mean_td_error: 0.018256843090057373
        min_q: 4.734739303588867
    num_steps_sampled: 1831936
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1789,3933.03,1831936,47.8012,50.771,45.3875,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-13-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.76214940543663
  episode_reward_mean: 48.55289705547753
  episode_reward_min: 45.387541196533995
  episodes_this_iter: 8
  episodes_total: 18336
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1833984
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012111508985981345
        max_q: 4.147646427154541
        mean_q: 4.109425067901611
        mean_td_error: -0.015318013727664948
        min_q: 3.98215913772583
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004645296838134527
        max_q: 4.793670654296875
        mean_q: 4.667593955993652
        mean_td_error: -0.08680576086044312
        min_q: 4.4945268630981445
    num_steps_sampled: 1833984
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1791,3939.99,1833984,48.5529,53.7621,45.3875,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-13-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.76214940543663
  episode_reward_mean: 48.72228425450574
  episode_reward_min: 45.387541196533995
  episodes_this_iter: 16
  episodes_total: 18360
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1836032
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007453765720129013
        max_q: 4.274870872497559
        mean_q: 4.174256324768066
        mean_td_error: 0.10631415992975235
        min_q: 3.8487722873687744
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03694264218211174
        max_q: 5.077465057373047
        mean_q: 3.153740406036377
        mean_td_error: -1.5252636671066284
        min_q: 1.9908690452575684
    num_steps_sampled: 1836032
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1793,3946.23,1836032,48.7223,53.7621,45.3875,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-13-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.33245979322986
  episode_reward_mean: 49.01936734757528
  episode_reward_min: 45.387541196533995
  episodes_this_iter: 8
  episodes_total: 18376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1838080
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018029645085334778
        max_q: 4.346024990081787
        mean_q: 4.134588718414307
        mean_td_error: -0.24151740968227386
        min_q: 3.8339390754699707
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02967063896358013
        max_q: 4.701899528503418
        mean_q: 4.233266830444336
        mean_td_error: -0.512391984462738
        min_q: 3.7505764961242676
    num_steps_sampled: 1838080
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1795,3952.16,1838080,49.0194,56.3325,45.3875,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-13-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.33245979322986
  episode_reward_mean: 49.654220804883735
  episode_reward_min: 43.77828435212422
  episodes_this_iter: 16
  episodes_total: 18400
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1840128
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03841149061918259
        max_q: 4.209965705871582
        mean_q: 3.5951969623565674
        mean_td_error: -0.5348101854324341
        min_q: 2.906411647796631
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02814853936433792
        max_q: 4.319597244262695
        mean_q: 4.082098007202148
        mean_td_error: -0.5013633370399475
        min_q: 4.031600475311279
    num_steps_sampled: 1840128
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1797,3957.76,1840128,49.6542,56.3325,43.7783,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-13-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.33245979322986
  episode_reward_mean: 49.552002445339525
  episode_reward_min: 43.77828435212422
  episodes_this_iter: 8
  episodes_total: 18416
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1842176
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02067204751074314
        max_q: 2.9317924976348877
        mean_q: 2.529756784439087
        mean_td_error: -0.2930816113948822
        min_q: 2.248840093612671
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002164240926504135
        max_q: 4.411170482635498
        mean_q: 4.322536945343018
        mean_td_error: -0.04014003276824951
        min_q: 4.234934329986572
    num_steps_sampled: 1842176
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1799,3963.35,1842176,49.552,56.3325,43.7783,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-13-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.33245979322986
  episode_reward_mean: 49.40749065180503
  episode_reward_min: 43.77828435212422
  episodes_this_iter: 8
  episodes_total: 18440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1844224
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010900856927037239
        max_q: 2.2402851581573486
        mean_q: 2.1053109169006348
        mean_td_error: -0.1398213654756546
        min_q: 1.7792353630065918
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0043556224554777145
        max_q: 4.514984607696533
        mean_q: 4.432219982147217
        mean_td_error: 0.08824387192726135
        min_q: 4.37929105758667
    num_steps_sampled: 1844224
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1801,3968.57,1844224,49.4075,56.3325,43.7783,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-13-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.33245979322986
  episode_reward_mean: 49.70251337995709
  episode_reward_min: 43.77828435212422
  episodes_this_iter: 8
  episodes_total: 18456
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1846272
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006158974952995777
        max_q: 2.3027775287628174
        mean_q: 2.091702938079834
        mean_td_error: 0.08266595751047134
        min_q: 1.985580563545227
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00733233243227005
        max_q: 4.50451135635376
        mean_q: 4.4552788734436035
        mean_td_error: -0.1344534456729889
        min_q: 4.368823528289795
    num_steps_sampled: 1846272
    num_steps_trained: 1

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1803,3973.76,1846272,49.7025,56.3325,43.7783,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-14-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.19612010329934
  episode_reward_mean: 49.420466598654585
  episode_reward_min: 43.77828435212422
  episodes_this_iter: 8
  episodes_total: 18480
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1848320
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006945854518562555
        max_q: 2.1311895847320557
        mean_q: 1.8977869749069214
        mean_td_error: -0.08701545000076294
        min_q: 1.746511697769165
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003165560308843851
        max_q: 4.464587211608887
        mean_q: 4.426452159881592
        mean_td_error: -0.052824318408966064
        min_q: 4.3217573165893555
    num_steps_sampled: 1848320
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1805,3979.08,1848320,49.4205,54.1961,43.7783,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-14-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.19612010329934
  episode_reward_mean: 49.334061345165985
  episode_reward_min: 43.80575815121908
  episodes_this_iter: 8
  episodes_total: 18496
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1850368
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015008395537734032
        max_q: 2.1280815601348877
        mean_q: 2.0312910079956055
        mean_td_error: -0.20861053466796875
        min_q: 1.9176212549209595
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006576895248144865
        max_q: 4.376029014587402
        mean_q: 4.319365978240967
        mean_td_error: -0.10557064414024353
        min_q: 4.207859516143799
    num_steps_sampled: 1850368
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1807,3984.12,1850368,49.3341,54.1961,43.8058,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-14-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.19612010329934
  episode_reward_mean: 49.00983672851126
  episode_reward_min: 42.899300347761994
  episodes_this_iter: 8
  episodes_total: 18520
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1852416
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003917175345122814
        max_q: 2.46100115776062
        mean_q: 2.3822057247161865
        mean_td_error: -0.05417833477258682
        min_q: 2.294398546218872
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028542864602059126
        max_q: 4.383693218231201
        mean_q: 4.335218906402588
        mean_td_error: -0.041460052132606506
        min_q: 4.252959728240967
    num_steps_sampled: 1852416
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1809,3989.05,1852416,49.0098,54.1961,42.8993,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-14-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.19612010329934
  episode_reward_mean: 48.869476775673995
  episode_reward_min: 42.899300347761994
  episodes_this_iter: 16
  episodes_total: 18544
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1854464
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0040606423281133175
        max_q: 2.546356439590454
        mean_q: 2.445302963256836
        mean_td_error: -0.05456557124853134
        min_q: 2.30580997467041
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004272052552551031
        max_q: 4.447552680969238
        mean_q: 4.3097429275512695
        mean_td_error: -0.0721927136182785
        min_q: 4.232999324798584
    num_steps_sampled: 1854464
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1811,3994.44,1854464,48.8695,54.1961,42.8993,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-14-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.987124334068454
  episode_reward_mean: 48.42007297759712
  episode_reward_min: 42.38230631219637
  episodes_this_iter: 8
  episodes_total: 18560
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1856512
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008081819862127304
        max_q: 2.6586179733276367
        mean_q: 2.36808180809021
        mean_td_error: -0.10749601572751999
        min_q: 2.22767972946167
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007830369286239147
        max_q: 4.45192289352417
        mean_q: 4.353652000427246
        mean_td_error: 0.1342134028673172
        min_q: 4.255164623260498
    num_steps_sampled: 1856512
    num_steps_trained: 18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1813,4000.11,1856512,48.4201,53.9871,42.3823,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-14-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.987124334068454
  episode_reward_mean: 47.84811393696509
  episode_reward_min: 42.38230631219637
  episodes_this_iter: 16
  episodes_total: 18584
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1858560
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005370388738811016
        max_q: 2.6262662410736084
        mean_q: 2.405224323272705
        mean_td_error: -0.06262817978858948
        min_q: 2.2871601581573486
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007398557849228382
        max_q: 4.340671539306641
        mean_q: 4.180730819702148
        mean_td_error: -0.11943978071212769
        min_q: 4.0827460289001465
    num_steps_sampled: 1858560
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1815,4005.68,1858560,47.8481,53.9871,42.3823,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-14-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.987124334068454
  episode_reward_mean: 48.02985621124621
  episode_reward_min: 42.38230631219637
  episodes_this_iter: 8
  episodes_total: 18600
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1860608
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00502338632941246
        max_q: 2.519707679748535
        mean_q: 2.3693056106567383
        mean_td_error: -0.05918619781732559
        min_q: 2.1046082973480225
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017521178349852562
        max_q: 4.35391092300415
        mean_q: 4.256031036376953
        mean_td_error: 0.28949522972106934
        min_q: 4.11547327041626
    num_steps_sampled: 1860608
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1817,4011.38,1860608,48.0299,53.9871,42.3823,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-14-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.987124334068454
  episode_reward_mean: 48.291131796045484
  episode_reward_min: 42.38230631219637
  episodes_this_iter: 8
  episodes_total: 18624
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1862656
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007017598953098059
        max_q: 2.304936647415161
        mean_q: 2.011556386947632
        mean_td_error: -0.08960036188364029
        min_q: 1.7795435190200806
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004245527554303408
        max_q: 4.36068868637085
        mean_q: 4.22827672958374
        mean_td_error: 0.05335819721221924
        min_q: 4.069683074951172
    num_steps_sampled: 1862656
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1819,4018.16,1862656,48.2911,53.9871,42.3823,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-14-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.094178723852444
  episode_reward_mean: 48.72294174690877
  episode_reward_min: 42.38230631219637
  episodes_this_iter: 8
  episodes_total: 18640
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1864704
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008062399923801422
        max_q: 2.607304573059082
        mean_q: 2.403520107269287
        mean_td_error: -0.11503519862890244
        min_q: 2.3061795234680176
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005736089777201414
        max_q: 4.470093727111816
        mean_q: 4.268147945404053
        mean_td_error: 0.0978153645992279
        min_q: 4.112765312194824
    num_steps_sampled: 1864704
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1821,4025.99,1864704,48.7229,54.0942,42.3823,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-14-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.094178723852444
  episode_reward_mean: 49.059821716685434
  episode_reward_min: 44.145753826564004
  episodes_this_iter: 8
  episodes_total: 18664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1866752
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00244654668495059
        max_q: 2.720540761947632
        mean_q: 2.6429922580718994
        mean_td_error: -0.031148523092269897
        min_q: 2.5812878608703613
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005976320244371891
        max_q: 4.2898969650268555
        mean_q: 4.163233757019043
        mean_td_error: -0.09545467793941498
        min_q: 4.042303085327148
    num_steps_sampled: 1866752
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1823,4033.61,1866752,49.0598,54.0942,44.1458,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-15-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.094178723852444
  episode_reward_mean: 49.78196852332693
  episode_reward_min: 45.96680912506536
  episodes_this_iter: 16
  episodes_total: 18688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1868800
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032235775142908096
        max_q: 2.8259692192077637
        mean_q: 2.7306036949157715
        mean_td_error: -0.043597154319286346
        min_q: 2.5872836112976074
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004321676678955555
        max_q: 4.201992034912109
        mean_q: 4.050562381744385
        mean_td_error: -0.05436880141496658
        min_q: 3.9117608070373535
    num_steps_sampled: 1868800
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1825,4040.42,1868800,49.782,54.0942,45.9668,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-15-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.812780280182075
  episode_reward_mean: 50.353215400172324
  episode_reward_min: 45.96680912506536
  episodes_this_iter: 8
  episodes_total: 18704
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1870848
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006518522743135691
        max_q: 3.0148942470550537
        mean_q: 2.8957836627960205
        mean_td_error: -0.08798623085021973
        min_q: 2.79278826713562
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009835327975451946
        max_q: 4.4237236976623535
        mean_q: 4.076814651489258
        mean_td_error: -0.11319854110479355
        min_q: 3.962982416152954
    num_steps_sampled: 1870848
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1827,4047.17,1870848,50.3532,55.8128,45.9668,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-15-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.812780280182075
  episode_reward_mean: 50.4759191099273
  episode_reward_min: 46.6327089532257
  episodes_this_iter: 16
  episodes_total: 18728
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1872896
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002453526947647333
        max_q: 3.202303886413574
        mean_q: 3.071669101715088
        mean_td_error: -0.03466157615184784
        min_q: 2.9373526573181152
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005272089969366789
        max_q: 4.482513904571533
        mean_q: 4.31300163269043
        mean_td_error: 0.04154719412326813
        min_q: 4.020078659057617
    num_steps_sampled: 1872896
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1829,4053.01,1872896,50.4759,55.8128,46.6327,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-15-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.74656182872666
  episode_reward_mean: 51.06731211395823
  episode_reward_min: 46.6327089532257
  episodes_this_iter: 8
  episodes_total: 18744
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1874944
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012535648420453072
        max_q: 3.3418917655944824
        mean_q: 3.0880672931671143
        mean_td_error: -0.16353264451026917
        min_q: 2.947421073913574
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.028944911435246468
        max_q: 4.156126499176025
        mean_q: 3.869239091873169
        mean_td_error: -0.36131930351257324
        min_q: 3.725921630859375
    num_steps_sampled: 1874944
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1831,4058.51,1874944,51.0673,56.7466,46.6327,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-15-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.74656182872666
  episode_reward_mean: 51.436056932055365
  episode_reward_min: 45.13475272443967
  episodes_this_iter: 16
  episodes_total: 18768
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1876992
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017547203227877617
        max_q: 2.985187530517578
        mean_q: 2.821052312850952
        mean_td_error: -0.22691941261291504
        min_q: 2.6376161575317383
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021745897829532623
        max_q: 4.263519287109375
        mean_q: 3.615920305252075
        mean_td_error: -0.20806089043617249
        min_q: 3.281796455383301
    num_steps_sampled: 1876992
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1833,4063.97,1876992,51.4361,56.7466,45.1348,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-15-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.62699216401722
  episode_reward_mean: 52.132922470136265
  episode_reward_min: 45.13475272443967
  episodes_this_iter: 8
  episodes_total: 18784
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1879040
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01693553663790226
        max_q: 2.670071840286255
        mean_q: 2.3841264247894287
        mean_td_error: -0.23370280861854553
        min_q: 2.063091278076172
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.031074650585651398
        max_q: 3.1402347087860107
        mean_q: 2.59175705909729
        mean_td_error: -0.37028947472572327
        min_q: 2.22581148147583
    num_steps_sampled: 1879040
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1835,4069.15,1879040,52.1329,57.627,45.1348,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-15-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.62699216401722
  episode_reward_mean: 52.85809667252889
  episode_reward_min: 45.13475272443967
  episodes_this_iter: 8
  episodes_total: 18808
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1881088
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017368121072649956
        max_q: 2.2026901245117188
        mean_q: 2.0246148109436035
        mean_td_error: -0.20861053466796875
        min_q: 1.6022741794586182
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016141681000590324
        max_q: 2.9610300064086914
        mean_q: 2.773998975753784
        mean_td_error: -0.1768283098936081
        min_q: 2.6014251708984375
    num_steps_sampled: 1881088
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1837,4074.52,1881088,52.8581,57.627,45.1348,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-15-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.62699216401722
  episode_reward_mean: 53.63170508084
  episode_reward_min: 45.13475272443967
  episodes_this_iter: 8
  episodes_total: 18824
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1883136
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016963606467470527
        max_q: 2.42515230178833
        mean_q: 2.2171239852905273
        mean_td_error: 0.013423159718513489
        min_q: 2.1143946647644043
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005029828753322363
        max_q: 3.490530490875244
        mean_q: 3.201140880584717
        mean_td_error: -0.025198325514793396
        min_q: 3.101405143737793
    num_steps_sampled: 1883136
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1839,4080.13,1883136,53.6317,57.627,45.1348,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-15-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.62699216401722
  episode_reward_mean: 53.982945290411514
  episode_reward_min: 45.13475272443967
  episodes_this_iter: 8
  episodes_total: 18848
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1885184
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025474117137491703
        max_q: 2.851901054382324
        mean_q: 2.6258881092071533
        mean_td_error: 0.013501673936843872
        min_q: 2.53691029548645
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003908043727278709
        max_q: 3.570281505584717
        mean_q: 3.434798002243042
        mean_td_error: -0.04126769304275513
        min_q: 3.3653621673583984
    num_steps_sampled: 1885184
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1841,4085.86,1885184,53.9829,57.627,45.1348,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-15-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.391126283093996
  episode_reward_mean: 55.38933347477924
  episode_reward_min: 51.30403581490841
  episodes_this_iter: 16
  episodes_total: 18872
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1887232
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016705213114619255
        max_q: 3.1510396003723145
        mean_q: 2.494974374771118
        mean_td_error: -0.1946353316307068
        min_q: 2.265465497970581
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002469549188390374
        max_q: 3.8474063873291016
        mean_q: 3.756838798522949
        mean_td_error: -0.011207431554794312
        min_q: 3.65950608253479
    num_steps_sampled: 1887232
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1843,4091.86,1887232,55.3893,60.3911,51.304,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-16-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.42593460404991
  episode_reward_mean: 55.66426891794853
  episode_reward_min: 51.30403581490841
  episodes_this_iter: 8
  episodes_total: 18888
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1889280
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00740705244243145
        max_q: 2.6072304248809814
        mean_q: 2.37349534034729
        mean_td_error: -0.10068322718143463
        min_q: 2.129833221435547
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011622551828622818
        max_q: 4.065943241119385
        mean_q: 3.9489850997924805
        mean_td_error: -0.0001893937587738037
        min_q: 3.871509075164795
    num_steps_sampled: 1889280
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1845,4098.18,1889280,55.6643,60.4259,51.304,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-16-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.42593460404991
  episode_reward_mean: 56.19274868294433
  episode_reward_min: 52.48283116641516
  episodes_this_iter: 16
  episodes_total: 18912
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1891328
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004130553919821978
        max_q: 2.7069075107574463
        mean_q: 2.5924181938171387
        mean_td_error: -0.05306236445903778
        min_q: 2.427982807159424
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004100338090211153
        max_q: 4.20499324798584
        mean_q: 4.098532199859619
        mean_td_error: 0.05110948532819748
        min_q: 3.939509391784668
    num_steps_sampled: 1891328
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1847,4104.96,1891328,56.1927,60.4259,52.4828,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-16-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.42593460404991
  episode_reward_mean: 56.78881169267528
  episode_reward_min: 52.48283116641516
  episodes_this_iter: 8
  episodes_total: 18928
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1893376
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006545796059072018
        max_q: 3.200471878051758
        mean_q: 3.0251173973083496
        mean_td_error: 0.07115034759044647
        min_q: 2.8914761543273926
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004159921780228615
        max_q: 4.161365509033203
        mean_q: 4.06249475479126
        mean_td_error: 0.04849102348089218
        min_q: 3.9061105251312256
    num_steps_sampled: 1893376
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1849,4111.92,1893376,56.7888,60.4259,52.4828,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-16-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.42593460404991
  episode_reward_mean: 57.36562532858778
  episode_reward_min: 52.925706440494615
  episodes_this_iter: 8
  episodes_total: 18952
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1895424
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0047532361932098866
        max_q: 3.243687629699707
        mean_q: 3.053032875061035
        mean_td_error: -0.040615491569042206
        min_q: 2.955054521560669
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00912550836801529
        max_q: 4.112412929534912
        mean_q: 3.8914825916290283
        mean_td_error: -0.11275148391723633
        min_q: 3.65399169921875
    num_steps_sampled: 1895424
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1851,4120.3,1895424,57.3656,60.4259,52.9257,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-16-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.42593460404991
  episode_reward_mean: 57.57766103009752
  episode_reward_min: 53.99088944020281
  episodes_this_iter: 8
  episodes_total: 18960
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1896448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004602963570505381
        max_q: 3.212629556655884
        mean_q: 3.0322775840759277
        mean_td_error: -0.029259219765663147
        min_q: 2.8686583042144775
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008205797523260117
        max_q: 4.0120744705200195
        mean_q: 3.6427998542785645
        mean_td_error: -0.07258573919534683
        min_q: 3.3487565517425537
    num_steps_sampled: 1896448
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1852,4125.21,1896448,57.5777,60.4259,53.9909,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-16-38
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.42593460404991
  episode_reward_mean: 57.55310954752095
  episode_reward_min: 53.99088944020281
  episodes_this_iter: 8
  episodes_total: 18968
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1897472
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018998948857188225
        max_q: 3.2882256507873535
        mean_q: 3.0900275707244873
        mean_td_error: -0.25159066915512085
        min_q: 2.9289908409118652
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005003160331398249
        max_q: 3.7822155952453613
        mean_q: 3.322270154953003
        mean_td_error: 0.018194779753684998
        min_q: 2.880736827850342
    num_steps_sampled: 1897472
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1853,4130.28,1897472,57.5531,60.4259,53.9909,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-16-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.42593460404991
  episode_reward_mean: 57.82261279185829
  episode_reward_min: 54.75093600476601
  episodes_this_iter: 16
  episodes_total: 18984
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1898496
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021171802654862404
        max_q: 3.084683656692505
        mean_q: 2.9506213665008545
        mean_td_error: -0.2945314049720764
        min_q: 2.7347378730773926
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011633271351456642
        max_q: 3.1476309299468994
        mean_q: 2.9493355751037598
        mean_td_error: -0.13298380374908447
        min_q: 2.774384021759033
    num_steps_sampled: 1898496
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1854,4136.38,1898496,57.8226,60.4259,54.7509,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-16-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.34318230128976
  episode_reward_mean: 57.74126761249201
  episode_reward_min: 54.75093600476601
  episodes_this_iter: 8
  episodes_total: 18992
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1899520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0082706268876791
        max_q: 2.914304733276367
        mean_q: 2.74845290184021
        mean_td_error: -0.09610208868980408
        min_q: 2.6350510120391846
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014118689112365246
        max_q: 2.9665143489837646
        mean_q: 2.737903356552124
        mean_td_error: -0.18097111582756042
        min_q: 2.3650879859924316
    num_steps_sampled: 1899520
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1855,4142.42,1899520,57.7413,59.3432,54.7509,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-16-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.34318230128976
  episode_reward_mean: 57.67089136155982
  episode_reward_min: 55.568910334133044
  episodes_this_iter: 8
  episodes_total: 19000
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1900544
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013046005740761757
        max_q: 2.922844886779785
        mean_q: 2.799062728881836
        mean_td_error: -0.16819199919700623
        min_q: 2.7302372455596924
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007219708524644375
        max_q: 3.0411014556884766
        mean_q: 2.6738226413726807
        mean_td_error: -0.0841970443725586
        min_q: 2.452329635620117
    num_steps_sampled: 1900544
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1856,4148.79,1900544,57.6709,59.3432,55.5689,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-17-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.34318230128976
  episode_reward_mean: 57.61340062969535
  episode_reward_min: 55.568910334133044
  episodes_this_iter: 8
  episodes_total: 19008
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1901568
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005995960906147957
        max_q: 3.2354421615600586
        mean_q: 3.0745832920074463
        mean_td_error: -0.07222907990217209
        min_q: 2.9246838092803955
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008188704028725624
        max_q: 3.0125794410705566
        mean_q: 2.8014068603515625
        mean_td_error: -0.0985817089676857
        min_q: 2.6409072875976562
    num_steps_sampled: 1901568
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1857,4156.21,1901568,57.6134,59.3432,55.5689,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-17-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.05593754378566
  episode_reward_mean: 57.57704053749161
  episode_reward_min: 55.568910334133044
  episodes_this_iter: 16
  episodes_total: 19024
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1902592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009804117493331432
        max_q: 3.3141236305236816
        mean_q: 3.112529993057251
        mean_td_error: -0.1301593780517578
        min_q: 2.979323387145996
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007040991447865963
        max_q: 3.1643991470336914
        mean_q: 2.9161386489868164
        mean_td_error: -0.08231187611818314
        min_q: 2.7587356567382812
    num_steps_sampled: 1902592
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1858,4165.09,1902592,57.577,59.0559,55.5689,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-17-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.83132058151208
  episode_reward_mean: 57.718237684497645
  episode_reward_min: 55.568910334133044
  episodes_this_iter: 8
  episodes_total: 19032
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1903616
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029744324274361134
        max_q: 3.274148941040039
        mean_q: 3.1724236011505127
        mean_td_error: -0.04085932672023773
        min_q: 2.9151554107666016
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.022521451115608215
        max_q: 2.8710713386535645
        mean_q: 2.6581287384033203
        mean_td_error: -0.27973389625549316
        min_q: 2.5529048442840576
    num_steps_sampled: 1903616
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1859,4172.53,1903616,57.7182,59.8313,55.5689,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-17-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.83132058151208
  episode_reward_mean: 57.28902413666775
  episode_reward_min: 54.20471224095935
  episodes_this_iter: 16
  episodes_total: 19056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1905664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006997079122811556
        max_q: 2.971179962158203
        mean_q: 2.7696533203125
        mean_td_error: -0.09019166976213455
        min_q: 2.651376962661743
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004402606748044491
        max_q: 3.191908597946167
        mean_q: 3.086400270462036
        mean_td_error: -0.04204193502664566
        min_q: 3.010796308517456
    num_steps_sampled: 1905664
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1861,4180.79,1905664,57.289,59.8313,54.2047,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-17-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.83132058151208
  episode_reward_mean: 56.87151884331613
  episode_reward_min: 54.20471224095935
  episodes_this_iter: 8
  episodes_total: 19072
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1907712
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03331746906042099
        max_q: 2.8439955711364746
        mean_q: 2.7038278579711914
        mean_td_error: -0.32896536588668823
        min_q: 2.614391565322876
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005446885246783495
        max_q: 3.5882058143615723
        mean_q: 3.4901161193847656
        mean_td_error: -0.06697244197130203
        min_q: 3.3762335777282715
    num_steps_sampled: 1907712
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1863,4186.9,1907712,56.8715,59.8313,54.2047,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-17-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.83132058151208
  episode_reward_mean: 56.50669746791147
  episode_reward_min: 54.20471224095935
  episodes_this_iter: 16
  episodes_total: 19096
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1909760
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.020932229235768318
        max_q: 3.0028042793273926
        mean_q: 2.8321521282196045
        mean_td_error: -0.22641292214393616
        min_q: 2.6438727378845215
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009913011454045773
        max_q: 3.808581829071045
        mean_q: 3.6081433296203613
        mean_td_error: -0.08845193684101105
        min_q: 3.4507334232330322
    num_steps_sampled: 1909760
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1865,4192.16,1909760,56.5067,59.8313,54.2047,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-17-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.83132058151208
  episode_reward_mean: 56.352257377082495
  episode_reward_min: 52.281055076441085
  episodes_this_iter: 8
  episodes_total: 19112
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1911808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0134752681478858
        max_q: 3.0496292114257812
        mean_q: 2.88446044921875
        mean_td_error: -0.12907762825489044
        min_q: 2.6787850856781006
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002648068591952324
        max_q: 4.002499103546143
        mean_q: 3.8048219680786133
        mean_td_error: 0.016005322337150574
        min_q: 3.5738742351531982
    num_steps_sampled: 1911808
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1867,4197.14,1911808,56.3523,59.8313,52.2811,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-17-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.592905361201026
  episode_reward_mean: 56.25301588756828
  episode_reward_min: 52.281055076441085
  episodes_this_iter: 8
  episodes_total: 19136
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1913856
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011802331544458866
        max_q: 3.158292293548584
        mean_q: 3.053973436355591
        mean_td_error: -0.1296110302209854
        min_q: 2.944666862487793
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005158646963536739
        max_q: 4.0972137451171875
        mean_q: 3.919097900390625
        mean_td_error: -0.06707502901554108
        min_q: 3.861335277557373
    num_steps_sampled: 1913856
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1869,4202.14,1913856,56.253,60.5929,52.2811,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-18-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.592905361201026
  episode_reward_mean: 56.605841444945106
  episode_reward_min: 52.281055076441085
  episodes_this_iter: 16
  episodes_total: 19168
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1916928
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01845669001340866
        max_q: 3.4826271533966064
        mean_q: 3.3613059520721436
        mean_td_error: -0.1778557002544403
        min_q: 3.2112045288085938
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012786012142896652
        max_q: 3.933682441711426
        mean_q: 3.760960340499878
        mean_td_error: -0.17694568634033203
        min_q: 3.6372475624084473
    num_steps_sampled: 1916928
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1872,4208.95,1916928,56.6058,60.5929,52.2811,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-18-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.592905361201026
  episode_reward_mean: 56.768143942807434
  episode_reward_min: 52.281055076441085
  episodes_this_iter: 16
  episodes_total: 19200
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1920000
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003567170351743698
        max_q: 3.7409913539886475
        mean_q: 3.56072735786438
        mean_td_error: -0.0029888153076171875
        min_q: 3.386808156967163
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024292278103530407
        max_q: 4.000888347625732
        mean_q: 3.8969669342041016
        mean_td_error: -0.0290854349732399
        min_q: 3.832305669784546
    num_steps_sampled: 1920000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1875,4216.07,1920000,56.7681,60.5929,52.2811,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-18-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 62.25474541675936
  episode_reward_mean: 57.120660616584466
  episode_reward_min: 53.49301055408547
  episodes_this_iter: 8
  episodes_total: 19216
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1922048
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03965268284082413
        max_q: 3.3498764038085938
        mean_q: 3.0549044609069824
        mean_td_error: -0.5044671297073364
        min_q: 2.8740363121032715
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005898647475987673
        max_q: 4.1023850440979
        mean_q: 3.9964420795440674
        mean_td_error: -0.09003988653421402
        min_q: 3.936490297317505
    num_steps_sampled: 1922048
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1877,4221.04,1922048,57.1207,62.2547,53.493,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-18-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 62.25474541675936
  episode_reward_mean: 56.9896828956579
  episode_reward_min: 53.06075178886172
  episodes_this_iter: 16
  episodes_total: 19240
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1924096
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02446797490119934
        max_q: 3.4300553798675537
        mean_q: 2.860034227371216
        mean_td_error: -0.2641543745994568
        min_q: 2.4334044456481934
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002512790495529771
        max_q: 4.0919084548950195
        mean_q: 4.001194953918457
        mean_td_error: 0.003136739134788513
        min_q: 3.8726799488067627
    num_steps_sampled: 1924096
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1879,4226.09,1924096,56.9897,62.2547,53.0608,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-18-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 62.25474541675936
  episode_reward_mean: 57.23459177557935
  episode_reward_min: 53.06075178886172
  episodes_this_iter: 8
  episodes_total: 19256
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1926144
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015303252264857292
        max_q: 2.7770893573760986
        mean_q: 2.630417823791504
        mean_td_error: -0.19979912042617798
        min_q: 2.5080132484436035
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002875192556530237
        max_q: 4.247094631195068
        mean_q: 4.095363140106201
        mean_td_error: 0.03237873315811157
        min_q: 3.992894411087036
    num_steps_sampled: 1926144
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1881,4231.32,1926144,57.2346,62.2547,53.0608,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-18-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 62.25474541675936
  episode_reward_mean: 56.8524904440208
  episode_reward_min: 52.15217670405251
  episodes_this_iter: 16
  episodes_total: 19280
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1928192
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012012331280857325
        max_q: 3.052330732345581
        mean_q: 2.9422738552093506
        mean_td_error: 0.01330486685037613
        min_q: 2.8228840827941895
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0192532017827034
        max_q: 4.071652412414551
        mean_q: 3.9115469455718994
        mean_td_error: -0.28271275758743286
        min_q: 3.7806472778320312
    num_steps_sampled: 1928192
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1883,4236.47,1928192,56.8525,62.2547,52.1522,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-18-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 62.25474541675936
  episode_reward_mean: 56.261372595270856
  episode_reward_min: 50.95240659282723
  episodes_this_iter: 8
  episodes_total: 19296
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1930240
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035966781433671713
        max_q: 3.4756622314453125
        mean_q: 3.2588253021240234
        mean_td_error: 0.023400969803333282
        min_q: 3.0925469398498535
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.028213797137141228
        max_q: 3.9714245796203613
        mean_q: 3.685715913772583
        mean_td_error: -0.41060593724250793
        min_q: 3.5182321071624756
    num_steps_sampled: 1930240
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1885,4241.64,1930240,56.2614,62.2547,50.9524,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-18-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.259457897822514
  episode_reward_mean: 54.685948422506215
  episode_reward_min: 50.83893573689372
  episodes_this_iter: 8
  episodes_total: 19320
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1932288
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011042310856282711
        max_q: 3.3927571773529053
        mean_q: 3.175105333328247
        mean_td_error: -0.14065194129943848
        min_q: 2.960451602935791
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012794074602425098
        max_q: 3.9620749950408936
        mean_q: 3.6229028701782227
        mean_td_error: -0.17505131661891937
        min_q: 3.327986717224121
    num_steps_sampled: 1932288
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1887,4246.85,1932288,54.6859,61.2595,50.8389,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-18-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.259457897822514
  episode_reward_mean: 54.37424018233063
  episode_reward_min: 50.83893573689372
  episodes_this_iter: 8
  episodes_total: 19336
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1934336
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012303250841796398
        max_q: 3.4382662773132324
        mean_q: 3.3211984634399414
        mean_td_error: -0.16547495126724243
        min_q: 3.2111871242523193
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.020485250279307365
        max_q: 3.2892210483551025
        mean_q: 2.8880248069763184
        mean_td_error: -0.30853718519210815
        min_q: 2.642146110534668
    num_steps_sampled: 1934336
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1889,4252.63,1934336,54.3742,61.2595,50.8389,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-18-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.779482684267975
  episode_reward_mean: 53.89756062183253
  episode_reward_min: 50.83893573689372
  episodes_this_iter: 8
  episodes_total: 19360
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1936384
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009261487983167171
        max_q: 3.6820058822631836
        mean_q: 3.541853904724121
        mean_td_error: -0.08775196224451065
        min_q: 3.4814250469207764
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01748066581785679
        max_q: 2.9682979583740234
        mean_q: 2.5603291988372803
        mean_td_error: -0.23145782947540283
        min_q: 2.06709361076355
    num_steps_sampled: 1936384
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1891,4258.3,1936384,53.8976,57.7795,50.8389,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-18-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.162142979861
  episode_reward_mean: 53.24345424275053
  episode_reward_min: 50.83893573689372
  episodes_this_iter: 16
  episodes_total: 19384
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1938432
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015294525772333145
        max_q: 3.8198978900909424
        mean_q: 3.6860268115997314
        mean_td_error: -0.1709357500076294
        min_q: 3.616837978363037
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.026025280356407166
        max_q: 2.5044381618499756
        mean_q: 2.170138359069824
        mean_td_error: -0.3509601354598999
        min_q: 2.000352621078491
    num_steps_sampled: 1938432
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1893,4264.11,1938432,53.2435,56.1621,50.8389,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-19-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.162142979861
  episode_reward_mean: 53.415616726542254
  episode_reward_min: 50.83893573689372
  episodes_this_iter: 8
  episodes_total: 19400
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1940480
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011578011326491833
        max_q: 4.16373348236084
        mean_q: 4.1005096435546875
        mean_td_error: -0.015353448688983917
        min_q: 3.992323875427246
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010972866788506508
        max_q: 2.3521647453308105
        mean_q: 2.138556718826294
        mean_td_error: -0.16833215951919556
        min_q: 1.9598870277404785
    num_steps_sampled: 1940480
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1895,4270.38,1940480,53.4156,56.1621,50.8389,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-19-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.162142979861
  episode_reward_mean: 53.388512299068914
  episode_reward_min: 49.251381656498836
  episodes_this_iter: 16
  episodes_total: 19424
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1942528
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0006892490200698376
        max_q: 4.556750297546387
        mean_q: 4.540892601013184
        mean_td_error: 0.0089435875415802
        min_q: 4.516188621520996
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005760401953011751
        max_q: 2.8387436866760254
        mean_q: 2.6344871520996094
        mean_td_error: -0.06693683564662933
        min_q: 2.507495164871216
    num_steps_sampled: 1942528
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1897,4276.66,1942528,53.3885,56.1621,49.2514,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-19-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.31244238750804
  episode_reward_mean: 53.34547853714079
  episode_reward_min: 49.251381656498836
  episodes_this_iter: 8
  episodes_total: 19440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1944576
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00132177141495049
        max_q: 4.8271403312683105
        mean_q: 4.792203426361084
        mean_td_error: 0.01757870614528656
        min_q: 4.743951320648193
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005734156351536512
        max_q: 3.039360761642456
        mean_q: 2.9138169288635254
        mean_td_error: -0.062144920229911804
        min_q: 2.8126628398895264
    num_steps_sampled: 1944576
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1899,4282.94,1944576,53.3455,55.3124,49.2514,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-19-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.17805870340284
  episode_reward_mean: 53.35840761382892
  episode_reward_min: 49.251381656498836
  episodes_this_iter: 8
  episodes_total: 19464
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1946624
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009395737433806062
        max_q: 4.866419792175293
        mean_q: 4.800268173217773
        mean_td_error: -0.0037525296211242676
        min_q: 4.691474437713623
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008947281166911125
        max_q: 2.8868370056152344
        mean_q: 2.745410919189453
        mean_td_error: -0.0958034098148346
        min_q: 2.595449686050415
    num_steps_sampled: 1946624
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1901,4289.35,1946624,53.3584,56.1781,49.2514,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-19-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.17805870340284
  episode_reward_mean: 53.53829814210094
  episode_reward_min: 49.251381656498836
  episodes_this_iter: 8
  episodes_total: 19480
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1948672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003371621947735548
        max_q: 4.950203895568848
        mean_q: 4.900176525115967
        mean_td_error: 0.050370633602142334
        min_q: 4.844358444213867
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01418542955070734
        max_q: 3.308196783065796
        mean_q: 3.1957552433013916
        mean_td_error: -0.1549851894378662
        min_q: 3.0799529552459717
    num_steps_sampled: 1948672
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1903,4295.7,1948672,53.5383,56.1781,49.2514,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-19-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.17805870340284
  episode_reward_mean: 53.72404386342514
  episode_reward_min: 49.251381656498836
  episodes_this_iter: 8
  episodes_total: 19504
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1950720
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003082719398662448
        max_q: 5.067291259765625
        mean_q: 4.994874954223633
        mean_td_error: -0.0341775119304657
        min_q: 4.92579460144043
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.039357926696538925
        max_q: 3.145573854446411
        mean_q: 2.9902191162109375
        mean_td_error: -0.4044901132583618
        min_q: 2.869321823120117
    num_steps_sampled: 1950720
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1905,4303.05,1950720,53.724,56.1781,49.2514,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-19-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.17805870340284
  episode_reward_mean: 53.72293708478419
  episode_reward_min: 49.251381656498836
  episodes_this_iter: 8
  episodes_total: 19520
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1952768
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012253639288246632
        max_q: 5.085090160369873
        mean_q: 5.010599136352539
        mean_td_error: 0.007280945777893066
        min_q: 4.953910827636719
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011941563338041306
        max_q: 3.3373117446899414
        mean_q: 3.242382526397705
        mean_td_error: -0.13303130865097046
        min_q: 3.1895384788513184
    num_steps_sampled: 1952768
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1907,4310.25,1952768,53.7229,56.1781,49.2514,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-19-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.17805870340284
  episode_reward_mean: 53.05735402510838
  episode_reward_min: 50.06913518241304
  episodes_this_iter: 8
  episodes_total: 19544
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1954816
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.000615467841271311
        max_q: 5.110633373260498
        mean_q: 5.056963920593262
        mean_td_error: -0.007673308253288269
        min_q: 4.977092742919922
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0070390235632658005
        max_q: 3.4647364616394043
        mean_q: 3.3586246967315674
        mean_td_error: -0.060232535004615784
        min_q: 3.287297248840332
    num_steps_sampled: 1954816
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1909,4317.6,1954816,53.0574,56.1781,50.0691,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-20-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.205768887904206
  episode_reward_mean: 52.55639511284837
  episode_reward_min: 48.885092603494016
  episodes_this_iter: 16
  episodes_total: 19568
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1956864
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00874912366271019
        max_q: 4.824747562408447
        mean_q: 4.753232002258301
        mean_td_error: -0.15693029761314392
        min_q: 4.654975891113281
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014927663840353489
        max_q: 3.802555799484253
        mean_q: 3.4176723957061768
        mean_td_error: -0.15926256775856018
        min_q: 3.1570653915405273
    num_steps_sampled: 1956864
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1911,4325.49,1956864,52.5564,58.2058,48.8851,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-20-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.29592241830941
  episode_reward_mean: 53.23873545860235
  episode_reward_min: 48.885092603494016
  episodes_this_iter: 8
  episodes_total: 19584
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1958912
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00986904464662075
        max_q: 4.671271324157715
        mean_q: 4.2035698890686035
        mean_td_error: -0.13148140907287598
        min_q: 3.8927154541015625
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03679398074746132
        max_q: 3.3489952087402344
        mean_q: 2.991981267929077
        mean_td_error: -0.34845170378685
        min_q: 2.706723928451538
    num_steps_sampled: 1958912
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1913,4332.43,1958912,53.2387,58.2959,48.8851,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-20-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.29592241830941
  episode_reward_mean: 52.89327770023714
  episode_reward_min: 48.885092603494016
  episodes_this_iter: 16
  episodes_total: 19608
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1960960
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003943804185837507
        max_q: 3.7630558013916016
        mean_q: 3.58400297164917
        mean_td_error: -0.005940377712249756
        min_q: 3.361417531967163
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.030537769198417664
        max_q: 2.6493756771087646
        mean_q: 2.4076647758483887
        mean_td_error: -0.33255961537361145
        min_q: 2.12847900390625
    num_steps_sampled: 1960960
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1915,4339.54,1960960,52.8933,58.2959,48.8851,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-20-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.29592241830941
  episode_reward_mean: 53.05936412762944
  episode_reward_min: 48.885092603494016
  episodes_this_iter: 8
  episodes_total: 19624
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1963008
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016887012170627713
        max_q: 3.6319522857666016
        mean_q: 3.5460104942321777
        mean_td_error: -0.008889764547348022
        min_q: 3.45353364944458
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010115283541381359
        max_q: 2.550355911254883
        mean_q: 2.380290985107422
        mean_td_error: -0.09626203775405884
        min_q: 2.214282512664795
    num_steps_sampled: 1963008
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1917,4346.56,1963008,53.0594,58.2959,48.8851,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-20-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.29592241830941
  episode_reward_mean: 54.07362033402193
  episode_reward_min: 48.885092603494016
  episodes_this_iter: 8
  episodes_total: 19648
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1965056
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01193081121891737
        max_q: 3.776249885559082
        mean_q: 3.7149622440338135
        mean_td_error: 0.19275817275047302
        min_q: 3.5742716789245605
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015912607312202454
        max_q: 2.612013816833496
        mean_q: 2.451706886291504
        mean_td_error: -0.1785300225019455
        min_q: 2.358699321746826
    num_steps_sampled: 1965056
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1919,4353.14,1965056,54.0736,58.2959,48.8851,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-20-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.29592241830941
  episode_reward_mean: 54.05963525889085
  episode_reward_min: 49.79021220632726
  episodes_this_iter: 8
  episodes_total: 19664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1967104
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020679906010627747
        max_q: 3.9409902095794678
        mean_q: 3.853006601333618
        mean_td_error: 0.0072294920682907104
        min_q: 3.7590813636779785
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0084677217528224
        max_q: 2.87725567817688
        mean_q: 2.7062766551971436
        mean_td_error: -0.09060045331716537
        min_q: 2.5431292057037354
    num_steps_sampled: 1967104
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1921,4359.44,1967104,54.0596,58.2959,49.7902,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-20-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.35724089446317
  episode_reward_mean: 53.00076157938913
  episode_reward_min: 48.24414326809576
  episodes_this_iter: 8
  episodes_total: 19688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1969152
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011631550267338753
        max_q: 4.023662090301514
        mean_q: 3.657663345336914
        mean_td_error: -0.17269307374954224
        min_q: 3.425887107849121
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013936439529061317
        max_q: 2.985386371612549
        mean_q: 2.8281474113464355
        mean_td_error: -0.15707671642303467
        min_q: 2.7560181617736816
    num_steps_sampled: 1969152
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1923,4365.69,1969152,53.0008,56.3572,48.2441,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-20-49
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.35724089446317
  episode_reward_mean: 52.87526277076316
  episode_reward_min: 48.24414326809576
  episodes_this_iter: 16
  episodes_total: 19712
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1971200
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016438497230410576
        max_q: 3.8628792762756348
        mean_q: 3.6304168701171875
        mean_td_error: 0.23750680685043335
        min_q: 3.269124984741211
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010159513913094997
        max_q: 3.1746208667755127
        mean_q: 3.0825886726379395
        mean_td_error: -0.1044410690665245
        min_q: 2.9234256744384766
    num_steps_sampled: 1971200
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1925,4372.21,1971200,52.8753,56.3572,48.2441,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-20-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.63009746520382
  episode_reward_mean: 53.37892721624123
  episode_reward_min: 48.24414326809576
  episodes_this_iter: 8
  episodes_total: 19728
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1973248
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0071299332194030285
        max_q: 3.6844863891601562
        mean_q: 3.547524929046631
        mean_td_error: -0.1086396723985672
        min_q: 3.3988704681396484
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002322140848264098
        max_q: 3.2732391357421875
        mean_q: 3.1307694911956787
        mean_td_error: -0.009642675518989563
        min_q: 3.008983850479126
    num_steps_sampled: 1973248
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1927,4380.12,1973248,53.3789,56.6301,48.2441,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-21-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.63009746520382
  episode_reward_mean: 52.703662479759615
  episode_reward_min: 46.51373253304886
  episodes_this_iter: 16
  episodes_total: 19752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1975296
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008348005823791027
        max_q: 3.6320888996124268
        mean_q: 3.5020265579223633
        mean_td_error: 0.12088008224964142
        min_q: 3.3677375316619873
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01914915069937706
        max_q: 3.179381847381592
        mean_q: 2.9220104217529297
        mean_td_error: -0.2001880407333374
        min_q: 2.681988477706909
    num_steps_sampled: 1975296
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1929,4388,1975296,52.7037,56.6301,46.5137,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-21-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.63009746520382
  episode_reward_mean: 52.76907929481822
  episode_reward_min: 46.51373253304886
  episodes_this_iter: 8
  episodes_total: 19768
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1977344
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004828914068639278
        max_q: 3.4407384395599365
        mean_q: 3.3695082664489746
        mean_td_error: -0.06871878355741501
        min_q: 3.2027394771575928
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01352343987673521
        max_q: 3.0714309215545654
        mean_q: 2.729548692703247
        mean_td_error: -0.12760087847709656
        min_q: 2.4924118518829346
    num_steps_sampled: 1977344
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1931,4395.71,1977344,52.7691,56.6301,46.5137,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-21-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.88460354291828
  episode_reward_mean: 53.48013306157536
  episode_reward_min: 46.51373253304886
  episodes_this_iter: 16
  episodes_total: 19792
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1979392
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02773919887840748
        max_q: 3.398139476776123
        mean_q: 3.2137351036071777
        mean_td_error: -0.41371721029281616
        min_q: 2.971676826477051
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003261478152126074
        max_q: 2.6797726154327393
        mean_q: 2.513040542602539
        mean_td_error: 0.007738590240478516
        min_q: 2.3705310821533203
    num_steps_sampled: 1979392
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1933,4403.76,1979392,53.4801,57.8846,46.5137,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-21-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.88460354291828
  episode_reward_mean: 53.72859424044762
  episode_reward_min: 46.51373253304886
  episodes_this_iter: 8
  episodes_total: 19808
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1981440
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0044539314694702625
        max_q: 3.3929800987243652
        mean_q: 3.2670485973358154
        mean_td_error: -0.06577587872743607
        min_q: 3.1468639373779297
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006318542640656233
        max_q: 2.9771344661712646
        mean_q: 2.8513474464416504
        mean_td_error: 0.07461841404438019
        min_q: 2.7582991123199463
    num_steps_sampled: 1981440
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1935,4411.71,1981440,53.7286,57.8846,46.5137,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-21-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.88460354291828
  episode_reward_mean: 52.75783725771709
  episode_reward_min: 46.51373253304886
  episodes_this_iter: 8
  episodes_total: 19832
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1983488
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021496249828487635
        max_q: 3.65338134765625
        mean_q: 3.6014814376831055
        mean_td_error: -0.031499363481998444
        min_q: 3.3614418506622314
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011690802872180939
        max_q: 3.2080795764923096
        mean_q: 2.9496102333068848
        mean_td_error: -0.13180820643901825
        min_q: 2.770773410797119
    num_steps_sampled: 1983488
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1937,4418.71,1983488,52.7578,57.8846,46.5137,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-21-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.346587789831
  episode_reward_mean: 52.963197404837594
  episode_reward_min: 48.15733024052931
  episodes_this_iter: 8
  episodes_total: 19848
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1985536
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01715961843729019
        max_q: 3.75832200050354
        mean_q: 3.5385942459106445
        mean_td_error: -0.2587180733680725
        min_q: 3.4022762775421143
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008936530910432339
        max_q: 3.23893141746521
        mean_q: 3.13883376121521
        mean_td_error: 0.10408790409564972
        min_q: 3.0726213455200195
    num_steps_sampled: 1985536
    num_steps_trained: 198

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1939,4425.1,1985536,52.9632,58.3466,48.1573,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-21-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.346587789831
  episode_reward_mean: 52.083461195731914
  episode_reward_min: 48.15733024052931
  episodes_this_iter: 8
  episodes_total: 19872
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1987584
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004842058289796114
        max_q: 3.7927536964416504
        mean_q: 3.590003490447998
        mean_td_error: -0.07600938528776169
        min_q: 3.4819459915161133
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025367860216647387
        max_q: 3.3189618587493896
        mean_q: 3.228315591812134
        mean_td_error: 0.01724454015493393
        min_q: 3.0606324672698975
    num_steps_sampled: 1987584
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1941,4431.92,1987584,52.0835,58.3466,48.1573,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-21-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.346587789831
  episode_reward_mean: 51.51564041478088
  episode_reward_min: 48.15733024052931
  episodes_this_iter: 16
  episodes_total: 19896
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1989632
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008076716214418411
        max_q: 3.4810848236083984
        mean_q: 3.294213056564331
        mean_td_error: -0.10712522268295288
        min_q: 3.14951491355896
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006691731978207827
        max_q: 3.2605926990509033
        mean_q: 3.105319023132324
        mean_td_error: -0.06465621292591095
        min_q: 3.0379090309143066
    num_steps_sampled: 1989632
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1943,4438.73,1989632,51.5156,58.3466,48.1573,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-22-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.346587789831
  episode_reward_mean: 51.29558392823149
  episode_reward_min: 48.15733024052931
  episodes_this_iter: 8
  episodes_total: 19912
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1991680
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0036206550430506468
        max_q: 3.405580520629883
        mean_q: 3.2507781982421875
        mean_td_error: -0.02972760796546936
        min_q: 3.123166561126709
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002032810589298606
        max_q: 3.4512314796447754
        mean_q: 3.3364477157592773
        mean_td_error: -0.007664896547794342
        min_q: 3.2313072681427
    num_steps_sampled: 1991680
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1945,4445.95,1991680,51.2956,58.3466,48.1573,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-22-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.346587789831
  episode_reward_mean: 51.08256092423704
  episode_reward_min: 47.25431806875245
  episodes_this_iter: 16
  episodes_total: 19936
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1993728
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006661317776888609
        max_q: 3.5554890632629395
        mean_q: 3.439035177230835
        mean_td_error: -0.0842030942440033
        min_q: 3.3764851093292236
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004602668806910515
        max_q: 3.581699848175049
        mean_q: 3.5029125213623047
        mean_td_error: -0.039050713181495667
        min_q: 3.450453996658325
    num_steps_sampled: 1993728
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1947,4453.22,1993728,51.0826,58.3466,47.2543,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-22-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.85748937304069
  episode_reward_mean: 51.317203886677305
  episode_reward_min: 47.25431806875245
  episodes_this_iter: 8
  episodes_total: 19952
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1995776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025809520855545998
        max_q: 3.5874128341674805
        mean_q: 3.296121597290039
        mean_td_error: -0.32479846477508545
        min_q: 3.050812005996704
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018267547711730003
        max_q: 3.754276752471924
        mean_q: 3.627993583679199
        mean_td_error: -0.19192031025886536
        min_q: 3.4991564750671387
    num_steps_sampled: 1995776
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1949,4460.25,1995776,51.3172,55.8575,47.2543,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-22-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.85748937304069
  episode_reward_mean: 51.274027718925645
  episode_reward_min: 47.25431806875245
  episodes_this_iter: 8
  episodes_total: 19976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1997824
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007277984172105789
        max_q: 3.8326053619384766
        mean_q: 3.6142303943634033
        mean_td_error: -0.08281883597373962
        min_q: 3.4603238105773926
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005435071885585785
        max_q: 3.787774085998535
        mean_q: 3.7089414596557617
        mean_td_error: -0.06523868441581726
        min_q: 3.594269275665283
    num_steps_sampled: 1997824
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1951,4467.16,1997824,51.274,55.8575,47.2543,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-22-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.85748937304069
  episode_reward_mean: 50.93800531898634
  episode_reward_min: 47.25431806875245
  episodes_this_iter: 8
  episodes_total: 19992
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 1999872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005889636930078268
        max_q: 3.871852159500122
        mean_q: 3.690668821334839
        mean_td_error: -0.06044449657201767
        min_q: 3.2874603271484375
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005675552878528833
        max_q: 3.6976964473724365
        mean_q: 3.578847646713257
        mean_td_error: -0.032987356185913086
        min_q: 3.4450814723968506
    num_steps_sampled: 1999872
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1953,4474.08,1999872,50.938,55.8575,47.2543,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-22-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.85748937304069
  episode_reward_mean: 51.02105911899019
  episode_reward_min: 47.25431806875245
  episodes_this_iter: 8
  episodes_total: 20016
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2001920
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06559362262487411
        max_q: 3.0796916484832764
        mean_q: 2.7178635597229004
        mean_td_error: -0.9008932113647461
        min_q: 2.2972586154937744
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006275436840951443
        max_q: 3.759875774383545
        mean_q: 3.5889406204223633
        mean_td_error: -0.039754800498485565
        min_q: 3.4653875827789307
    num_steps_sampled: 2001920
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1955,4480.82,2001920,51.0211,55.8575,47.2543,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-22-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.85748937304069
  episode_reward_mean: 51.02455823069552
  episode_reward_min: 48.215507231017284
  episodes_this_iter: 8
  episodes_total: 20032
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2003968
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.020041845738887787
        max_q: 2.468515396118164
        mean_q: 2.246999979019165
        mean_td_error: -0.24931970238685608
        min_q: 2.017528533935547
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006502584088593721
        max_q: 3.807384490966797
        mean_q: 3.745215654373169
        mean_td_error: -0.06487151235342026
        min_q: 3.692368745803833
    num_steps_sampled: 2003968
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1957,4487.45,2003968,51.0246,55.8575,48.2155,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-22-55
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.69944473543897
  episode_reward_mean: 51.05701032577022
  episode_reward_min: 44.73803643423976
  episodes_this_iter: 8
  episodes_total: 20056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2006016
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03725061193108559
        max_q: 1.908904790878296
        mean_q: 1.5862958431243896
        mean_td_error: -0.4661310315132141
        min_q: 1.302704930305481
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008625333197414875
        max_q: 3.976497173309326
        mean_q: 3.880366563796997
        mean_td_error: -0.07136069238185883
        min_q: 3.7504377365112305
    num_steps_sampled: 2006016
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1959,4493.6,2006016,51.057,59.6994,44.738,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-23-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.69944473543897
  episode_reward_mean: 51.071093399447776
  episode_reward_min: 44.73803643423976
  episodes_this_iter: 16
  episodes_total: 20080
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2008064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006757733412086964
        max_q: 2.1755599975585938
        mean_q: 2.0245113372802734
        mean_td_error: -0.07485264539718628
        min_q: 1.889304518699646
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0031347437761723995
        max_q: 4.1434645652771
        mean_q: 4.033027648925781
        mean_td_error: -0.02475469559431076
        min_q: 3.9227607250213623
    num_steps_sampled: 2008064
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1961,4499.2,2008064,51.0711,59.6994,44.738,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-23-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.69944473543897
  episode_reward_mean: 51.4758955630428
  episode_reward_min: 44.73803643423976
  episodes_this_iter: 8
  episodes_total: 20096
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2010112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02141917124390602
        max_q: 2.210643768310547
        mean_q: 1.887925624847412
        mean_td_error: -0.28756964206695557
        min_q: 1.588998794555664
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00864055659621954
        max_q: 4.268670558929443
        mean_q: 4.021603107452393
        mean_td_error: -0.08568435907363892
        min_q: 3.9092752933502197
    num_steps_sampled: 2010112
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1963,4504.81,2010112,51.4759,59.6994,44.738,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-23-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.69944473543897
  episode_reward_mean: 52.01200459177425
  episode_reward_min: 44.73803643423976
  episodes_this_iter: 16
  episodes_total: 20120
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2012160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018543753772974014
        max_q: 1.9621185064315796
        mean_q: 1.7705104351043701
        mean_td_error: -0.25447994470596313
        min_q: 1.4929217100143433
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.029256148263812065
        max_q: 4.158017635345459
        mean_q: 4.0128889083862305
        mean_td_error: -0.283820241689682
        min_q: 3.8885858058929443
    num_steps_sampled: 2012160
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1965,4510.41,2012160,52.012,59.6994,44.738,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-23-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.69944473543897
  episode_reward_mean: 52.72303494940842
  episode_reward_min: 44.73803643423976
  episodes_this_iter: 8
  episodes_total: 20136
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2014208
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017111970111727715
        max_q: 2.433897018432617
        mean_q: 2.216135263442993
        mean_td_error: 0.2275189459323883
        min_q: 2.1030635833740234
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003031622152775526
        max_q: 4.201216697692871
        mean_q: 4.020335674285889
        mean_td_error: -0.03221879154443741
        min_q: 3.8647732734680176
    num_steps_sampled: 2014208
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1967,4516.14,2014208,52.723,59.6994,44.738,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-23-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.82754103836083
  episode_reward_mean: 52.36284846098697
  episode_reward_min: 46.61693759969002
  episodes_this_iter: 8
  episodes_total: 20160
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2016256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009325447492301464
        max_q: 2.432356119155884
        mean_q: 2.182490825653076
        mean_td_error: -0.1126357764005661
        min_q: 2.1113698482513428
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010112524963915348
        max_q: 4.256147861480713
        mean_q: 4.141014575958252
        mean_td_error: -0.1150171309709549
        min_q: 3.853416919708252
    num_steps_sampled: 2016256
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1969,4521.45,2016256,52.3628,56.8275,46.6169,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-23-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.19973414507153
  episode_reward_mean: 53.46749139133751
  episode_reward_min: 46.61693759969002
  episodes_this_iter: 8
  episodes_total: 20176
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2018304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008012127131223679
        max_q: 2.5116329193115234
        mean_q: 2.4088234901428223
        mean_td_error: -0.10659625381231308
        min_q: 2.2542331218719482
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.07183673977851868
        max_q: 3.6496098041534424
        mean_q: 3.2601571083068848
        mean_td_error: -0.7842220664024353
        min_q: 2.788479804992676
    num_steps_sampled: 2018304
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1971,4526.78,2018304,53.4675,59.1997,46.6169,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-23-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.19973414507153
  episode_reward_mean: 53.10000857646195
  episode_reward_min: 46.678330425418466
  episodes_this_iter: 8
  episodes_total: 20200
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2020352
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002068013185635209
        max_q: 2.863685131072998
        mean_q: 2.717412233352661
        mean_td_error: -0.015858829021453857
        min_q: 2.5845327377319336
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03356552869081497
        max_q: 3.0383076667785645
        mean_q: 2.6318793296813965
        mean_td_error: -0.38140687346458435
        min_q: 2.4506821632385254
    num_steps_sampled: 2020352
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1973,4532.11,2020352,53.1,59.1997,46.6783,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-23-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.19973414507153
  episode_reward_mean: 53.68012956992019
  episode_reward_min: 46.678330425418466
  episodes_this_iter: 16
  episodes_total: 20224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2022400
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030281213112175465
        max_q: 3.0570852756500244
        mean_q: 2.9125797748565674
        mean_td_error: 0.04917287081480026
        min_q: 2.8298707008361816
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016058897599577904
        max_q: 2.729416608810425
        mean_q: 2.5898430347442627
        mean_td_error: -0.1414838582277298
        min_q: 2.5122158527374268
    num_steps_sampled: 2022400
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1975,4537.34,2022400,53.6801,59.1997,46.6783,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-23-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.19973414507153
  episode_reward_mean: 53.786419364575195
  episode_reward_min: 46.678330425418466
  episodes_this_iter: 8
  episodes_total: 20240
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2024448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003104384522885084
        max_q: 3.4680652618408203
        mean_q: 3.381129741668701
        mean_td_error: -0.04647424817085266
        min_q: 3.319761276245117
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.031976085156202316
        max_q: 2.7525625228881836
        mean_q: 2.5830347537994385
        mean_td_error: -0.3300531506538391
        min_q: 2.306478261947632
    num_steps_sampled: 2024448
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1977,4542.69,2024448,53.7864,59.1997,46.6783,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-23-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.19973414507153
  episode_reward_mean: 54.369876711970456
  episode_reward_min: 48.27512607928564
  episodes_this_iter: 16
  episodes_total: 20264
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2026496
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024376639630645514
        max_q: 3.716252326965332
        mean_q: 3.4711899757385254
        mean_td_error: 0.025005079805850983
        min_q: 3.338409900665283
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03941941261291504
        max_q: 2.521676778793335
        mean_q: 2.325716495513916
        mean_td_error: -0.3408920168876648
        min_q: 2.1815025806427
    num_steps_sampled: 2026496
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1979,4548.08,2026496,54.3699,59.1997,48.2751,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-23-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.044018687360385
  episode_reward_mean: 54.29722422388338
  episode_reward_min: 48.27512607928564
  episodes_this_iter: 8
  episodes_total: 20280
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2028544
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00340630067512393
        max_q: 3.4677581787109375
        mean_q: 3.3270926475524902
        mean_td_error: 0.04323669523000717
        min_q: 3.2038941383361816
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01239803247153759
        max_q: 2.7079355716705322
        mean_q: 2.477206230163574
        mean_td_error: -0.11267471313476562
        min_q: 2.290212631225586
    num_steps_sampled: 2028544
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1981,4553.93,2028544,54.2972,59.044,48.2751,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-24-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.044018687360385
  episode_reward_mean: 54.53587307681239
  episode_reward_min: 51.633300125139165
  episodes_this_iter: 16
  episodes_total: 20304
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2030592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005148059222847223
        max_q: 3.596973180770874
        mean_q: 3.4531807899475098
        mean_td_error: -0.0699610561132431
        min_q: 3.284040689468384
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007101006340235472
        max_q: 2.5366616249084473
        mean_q: 2.2105259895324707
        mean_td_error: -0.05094069615006447
        min_q: 1.9979881048202515
    num_steps_sampled: 2030592
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1983,4560.54,2030592,54.5359,59.044,51.6333,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-24-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.044018687360385
  episode_reward_mean: 53.725302765793465
  episode_reward_min: 49.42433788967966
  episodes_this_iter: 8
  episodes_total: 20320
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2032640
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0056275976821780205
        max_q: 3.5445759296417236
        mean_q: 3.418172597885132
        mean_td_error: -0.08014453202486038
        min_q: 3.2210750579833984
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007661775685846806
        max_q: 2.661834239959717
        mean_q: 2.507594108581543
        mean_td_error: -0.07953165471553802
        min_q: 2.3494527339935303
    num_steps_sampled: 2032640
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1985,4567.52,2032640,53.7253,59.044,49.4243,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-24-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.565178139136876
  episode_reward_mean: 53.51587286890388
  episode_reward_min: 49.42433788967966
  episodes_this_iter: 8
  episodes_total: 20344
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2034688
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014937769155949354
        max_q: 3.8195528984069824
        mean_q: 3.7265796661376953
        mean_td_error: 0.00873393565416336
        min_q: 3.6687986850738525
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007272900082170963
        max_q: 2.8823893070220947
        mean_q: 2.7850327491760254
        mean_td_error: -0.08451363444328308
        min_q: 2.71572208404541
    num_steps_sampled: 2034688
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1987,4574.91,2034688,53.5159,57.5652,49.4243,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-24-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.565178139136876
  episode_reward_mean: 53.43694016402529
  episode_reward_min: 49.42433788967966
  episodes_this_iter: 8
  episodes_total: 20360
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2036736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012875801883637905
        max_q: 3.7853543758392334
        mean_q: 3.721313714981079
        mean_td_error: -0.20550355315208435
        min_q: 3.6706032752990723
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002331690164282918
        max_q: 3.421386480331421
        mean_q: 3.234044075012207
        mean_td_error: 0.01845351606607437
        min_q: 3.068483829498291
    num_steps_sampled: 2036736
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1989,4581.78,2036736,53.4369,57.5652,49.4243,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-24-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.737896298989895
  episode_reward_mean: 53.68593351101043
  episode_reward_min: 49.42433788967966
  episodes_this_iter: 8
  episodes_total: 20384
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2038784
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024511446245014668
        max_q: 3.89850115776062
        mean_q: 3.7582459449768066
        mean_td_error: -0.0004873797297477722
        min_q: 3.6241679191589355
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029364577494561672
        max_q: 3.4429240226745605
        mean_q: 3.313746929168701
        mean_td_error: -0.01519358903169632
        min_q: 3.1924889087677
    num_steps_sampled: 2038784
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1991,4587.2,2038784,53.6859,58.7379,49.4243,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-24-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.737896298989895
  episode_reward_mean: 54.203360578223965
  episode_reward_min: 50.0523694967389
  episodes_this_iter: 8
  episodes_total: 20416
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2041856
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020099107641726732
        max_q: 4.136943817138672
        mean_q: 4.0104522705078125
        mean_td_error: -0.02497420459985733
        min_q: 3.9172251224517822
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013022351078689098
        max_q: 4.009491443634033
        mean_q: 3.8833730220794678
        mean_td_error: 0.2385900467634201
        min_q: 3.7271997928619385
    num_steps_sampled: 2041856
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1994,4594.06,2041856,54.2034,58.7379,50.0524,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-24-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.737896298989895
  episode_reward_mean: 53.95471767312018
  episode_reward_min: 48.86742561510404
  episodes_this_iter: 16
  episodes_total: 20448
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2044928
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025004642084240913
        max_q: 4.126657009124756
        mean_q: 4.0613226890563965
        mean_td_error: -0.048587262630462646
        min_q: 3.9445390701293945
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002855371916666627
        max_q: 4.554157257080078
        mean_q: 4.442601203918457
        mean_td_error: 0.04810492694377899
        min_q: 4.2883620262146
    num_steps_sampled: 2044928
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,1997,4600.29,2044928,53.9547,58.7379,48.8674,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-24-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.20196202081732
  episode_reward_mean: 53.03883275774507
  episode_reward_min: 48.86742561510404
  episodes_this_iter: 16
  episodes_total: 20480
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2048000
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0031317288521677256
        max_q: 4.203771591186523
        mean_q: 4.141307830810547
        mean_td_error: -0.06375236809253693
        min_q: 4.101563930511475
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003409390104934573
        max_q: 4.921447277069092
        mean_q: 4.850951194763184
        mean_td_error: 0.05740910768508911
        min_q: 4.765236854553223
    num_steps_sampled: 2048000
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2000,4605.82,2048000,53.0388,58.202,48.8674,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-24-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.20196202081732
  episode_reward_mean: 52.78554735768456
  episode_reward_min: 48.86742561510404
  episodes_this_iter: 8
  episodes_total: 20504
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2051072
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002523340517655015
        max_q: 4.4895405769348145
        mean_q: 4.3736491203308105
        mean_td_error: 0.03934371471405029
        min_q: 4.28065299987793
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001910999184474349
        max_q: 4.974703788757324
        mean_q: 4.928713798522949
        mean_td_error: 0.031844839453697205
        min_q: 4.794199466705322
    num_steps_sampled: 2051072
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2003,4611.45,2051072,52.7855,58.202,48.8674,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-25-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.05990963200301
  episode_reward_mean: 52.741396569044774
  episode_reward_min: 48.9622253948618
  episodes_this_iter: 8
  episodes_total: 20536
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2054144
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003752741264179349
        max_q: 4.077390670776367
        mean_q: 3.9705023765563965
        mean_td_error: -0.07467188686132431
        min_q: 3.7008306980133057
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006382085382938385
        max_q: 4.9788994789123535
        mean_q: 4.872791290283203
        mean_td_error: -0.11708630621433258
        min_q: 4.817506313323975
    num_steps_sampled: 2054144
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2006,4616.96,2054144,52.7414,56.0599,48.9622,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-25-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.05990963200301
  episode_reward_mean: 52.747608303458854
  episode_reward_min: 49.556907356416055
  episodes_this_iter: 8
  episodes_total: 20568
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2057216
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008524337783455849
        max_q: 3.841203212738037
        mean_q: 3.74348521232605
        mean_td_error: 0.16182337701320648
        min_q: 3.6080095767974854
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016510447021573782
        max_q: 5.012294292449951
        mean_q: 4.9626030921936035
        mean_td_error: 0.024654418230056763
        min_q: 4.891509532928467
    num_steps_sampled: 2057216
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2009,4622.59,2057216,52.7476,56.0599,49.5569,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-25-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.05990963200301
  episode_reward_mean: 52.83400019525672
  episode_reward_min: 49.556907356416055
  episodes_this_iter: 8
  episodes_total: 20600
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2060288
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007093848194926977
        max_q: 3.934022903442383
        mean_q: 3.768188238143921
        mean_td_error: -0.11994417011737823
        min_q: 3.593137741088867
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026875906623899937
        max_q: 4.985406875610352
        mean_q: 4.949674129486084
        mean_td_error: -0.05391104519367218
        min_q: 4.886432647705078
    num_steps_sampled: 2060288
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2012,4628.82,2060288,52.834,56.0599,49.5569,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-25-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.20247148582648
  episode_reward_mean: 51.46923988146913
  episode_reward_min: 46.80530524222082
  episodes_this_iter: 16
  episodes_total: 20632
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2063360
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013472060672938824
        max_q: 3.8107707500457764
        mean_q: 3.600102663040161
        mean_td_error: -0.24446967244148254
        min_q: 3.4760055541992188
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005457970779389143
        max_q: 4.967459201812744
        mean_q: 4.889893531799316
        mean_td_error: -0.09102985262870789
        min_q: 4.789215564727783
    num_steps_sampled: 2063360
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2015,4634.34,2063360,51.4692,56.2025,46.8053,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-25-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.78894764811083
  episode_reward_mean: 51.09544835753208
  episode_reward_min: 46.80530524222082
  episodes_this_iter: 16
  episodes_total: 20664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2066432
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0064620343036949635
        max_q: 3.8676860332489014
        mean_q: 3.7122740745544434
        mean_td_error: -0.10313695669174194
        min_q: 3.6372177600860596
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004158560186624527
        max_q: 4.558895111083984
        mean_q: 4.4703450202941895
        mean_td_error: -0.07724925875663757
        min_q: 4.3567214012146
    num_steps_sampled: 2066432
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2018,4639.74,2066432,51.0954,56.7889,46.8053,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-25-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.78894764811083
  episode_reward_mean: 51.44049417449462
  episode_reward_min: 46.80530524222082
  episodes_this_iter: 8
  episodes_total: 20688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2069504
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027521690353751183
        max_q: 4.0367631912231445
        mean_q: 3.888427734375
        mean_td_error: 0.05932338535785675
        min_q: 3.8257954120635986
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025431355461478233
        max_q: 4.407309055328369
        mean_q: 4.3697614669799805
        mean_td_error: -0.027715936303138733
        min_q: 4.305703639984131
    num_steps_sampled: 2069504
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2021,4645.11,2069504,51.4405,56.7889,46.8053,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-25-38
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.78894764811083
  episode_reward_mean: 51.68615179858303
  episode_reward_min: 46.89751494884232
  episodes_this_iter: 8
  episodes_total: 20720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2072576
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020135599188506603
        max_q: 4.152203559875488
        mean_q: 4.033297538757324
        mean_td_error: -0.04261583089828491
        min_q: 3.9686641693115234
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005390027537941933
        max_q: 4.587699890136719
        mean_q: 4.224517822265625
        mean_td_error: -0.0484742745757103
        min_q: 3.8639233112335205
    num_steps_sampled: 2072576
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2024,4650.74,2072576,51.6862,56.7889,46.8975,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-25-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.78894764811083
  episode_reward_mean: 52.22782192276207
  episode_reward_min: 46.89751494884232
  episodes_this_iter: 8
  episodes_total: 20752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2075648
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011232461547479033
        max_q: 4.284507751464844
        mean_q: 4.22628116607666
        mean_td_error: -0.018401041626930237
        min_q: 4.099050521850586
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003452372970059514
        max_q: 4.2771477699279785
        mean_q: 4.115535259246826
        mean_td_error: -0.02855566143989563
        min_q: 3.8459253311157227
    num_steps_sampled: 2075648
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2027,4656.52,2075648,52.2278,56.7889,46.8975,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-25-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.24964668840369
  episode_reward_mean: 52.1985403084179
  episode_reward_min: 46.89751494884232
  episodes_this_iter: 8
  episodes_total: 20784
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2078720
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023882717359811068
        max_q: 4.37632417678833
        mean_q: 4.228829383850098
        mean_td_error: -0.020901456475257874
        min_q: 4.051730632781982
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.034687627106904984
        max_q: 3.917647361755371
        mean_q: 3.5553133487701416
        mean_td_error: -0.4360121190547943
        min_q: 3.2025437355041504
    num_steps_sampled: 2078720
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2030,4662.51,2078720,52.1985,56.2496,46.8975,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-25-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.24964668840369
  episode_reward_mean: 53.29946346082504
  episode_reward_min: 50.049341918290594
  episodes_this_iter: 16
  episodes_total: 20816
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2081792
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06942390650510788
        max_q: 3.7631359100341797
        mean_q: 2.771064519882202
        mean_td_error: -1.7035828828811646
        min_q: 1.9288597106933594
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.036921970546245575
        max_q: 3.0177037715911865
        mean_q: 2.6073999404907227
        mean_td_error: -0.5020983815193176
        min_q: 2.211902141571045
    num_steps_sampled: 2081792
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2033,4668.71,2081792,53.2995,56.2496,50.0493,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-26-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.24964668840369
  episode_reward_mean: 53.16982863921272
  episode_reward_min: 50.828462424515614
  episodes_this_iter: 16
  episodes_total: 20848
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2084864
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013352954760193825
        max_q: 3.274869680404663
        mean_q: 2.936901092529297
        mean_td_error: -0.1708398461341858
        min_q: 2.76806640625
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01927175186574459
        max_q: 2.0276103019714355
        mean_q: 1.7401920557022095
        mean_td_error: -0.21765771508216858
        min_q: 1.5355052947998047
    num_steps_sampled: 2084864
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2036,4674.56,2084864,53.1698,56.2496,50.8285,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-26-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.24964668840369
  episode_reward_mean: 53.22390467195744
  episode_reward_min: 50.828462424515614
  episodes_this_iter: 8
  episodes_total: 20872
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2087936
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00766750006005168
        max_q: 3.255980968475342
        mean_q: 3.094727039337158
        mean_td_error: -0.10302639752626419
        min_q: 2.972501277923584
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015449224039912224
        max_q: 2.083777666091919
        mean_q: 1.9449759721755981
        mean_td_error: -0.17911297082901
        min_q: 1.8457235097885132
    num_steps_sampled: 2087936
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2039,4680.37,2087936,53.2239,56.2496,50.8285,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-26-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.54828653812977
  episode_reward_mean: 53.205915312040304
  episode_reward_min: 50.95041911012558
  episodes_this_iter: 8
  episodes_total: 20904
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2091008
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04282530024647713
        max_q: 2.980046510696411
        mean_q: 2.723573684692383
        mean_td_error: -0.489790141582489
        min_q: 2.57495379447937
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015263134613633156
        max_q: 2.7436859607696533
        mean_q: 2.588336944580078
        mean_td_error: -0.15904268622398376
        min_q: 2.430893898010254
    num_steps_sampled: 2091008
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2042,4686.5,2091008,53.2059,55.5483,50.9504,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-26-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.572456478963254
  episode_reward_mean: 52.511947666088
  episode_reward_min: 48.60171460173091
  episodes_this_iter: 8
  episodes_total: 20936
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2094080
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004238318186253309
        max_q: 2.639474868774414
        mean_q: 2.427001953125
        mean_td_error: 0.03623814135789871
        min_q: 2.2890982627868652
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009337680414319038
        max_q: 3.009150266647339
        mean_q: 2.721975803375244
        mean_td_error: 0.1261158436536789
        min_q: 2.436699628829956
    num_steps_sampled: 2094080
    num_steps_trained: 20930

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2045,4692.68,2094080,52.5119,55.5725,48.6017,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-26-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.572456478963254
  episode_reward_mean: 51.744394138204306
  episode_reward_min: 48.60171460173091
  episodes_this_iter: 8
  episodes_total: 20968
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2097152
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00702683674171567
        max_q: 3.3300881385803223
        mean_q: 3.252185821533203
        mean_td_error: 0.12070360779762268
        min_q: 3.0902624130249023
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004954403266310692
        max_q: 2.660645008087158
        mean_q: 2.558739423751831
        mean_td_error: -0.05836842954158783
        min_q: 2.4921953678131104
    num_steps_sampled: 2097152
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2048,4698.79,2097152,51.7444,55.5725,48.6017,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-26-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.572456478963254
  episode_reward_mean: 51.28340091775253
  episode_reward_min: 48.60171460173091
  episodes_this_iter: 8
  episodes_total: 21000
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2100224
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006655884440988302
        max_q: 3.806544065475464
        mean_q: 3.734795093536377
        mean_td_error: 0.11908698827028275
        min_q: 3.63883376121521
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01291599404066801
        max_q: 2.981642007827759
        mean_q: 2.762681007385254
        mean_td_error: -0.15161359310150146
        min_q: 2.530399799346924
    num_steps_sampled: 2100224
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2051,4705.45,2100224,51.2834,55.5725,48.6017,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-26-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.76290430389655
  episode_reward_mean: 51.66020566321813
  episode_reward_min: 47.039185902346105
  episodes_this_iter: 16
  episodes_total: 21032
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2103296
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028781115543097258
        max_q: 4.122428894042969
        mean_q: 4.052637100219727
        mean_td_error: -0.04806721210479736
        min_q: 3.9552197456359863
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013133411295711994
        max_q: 2.97210693359375
        mean_q: 2.783891201019287
        mean_td_error: -0.15020398795604706
        min_q: 2.6783831119537354
    num_steps_sampled: 2103296
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2054,4711.56,2103296,51.6602,55.7629,47.0392,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-26-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.76290430389655
  episode_reward_mean: 51.31894026849204
  episode_reward_min: 47.039185902346105
  episodes_this_iter: 8
  episodes_total: 21056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2106368
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003133118385449052
        max_q: 4.0651350021362305
        mean_q: 3.8983638286590576
        mean_td_error: -0.04264648258686066
        min_q: 3.7140393257141113
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0045217606239020824
        max_q: 3.1002087593078613
        mean_q: 2.9946937561035156
        mean_td_error: -0.05012824386358261
        min_q: 2.9266135692596436
    num_steps_sampled: 2106368
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2057,4717.5,2106368,51.3189,55.7629,47.0392,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-26-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.76290430389655
  episode_reward_mean: 51.137262774664784
  episode_reward_min: 47.039185902346105
  episodes_this_iter: 8
  episodes_total: 21088
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2109440
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024520871229469776
        max_q: 4.031115531921387
        mean_q: 3.948131799697876
        mean_td_error: -0.027486301958560944
        min_q: 3.846759796142578
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018948791548609734
        max_q: 3.2506022453308105
        mean_q: 3.1036314964294434
        mean_td_error: -0.24294370412826538
        min_q: 2.9577019214630127
    num_steps_sampled: 2109440
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2060,4723.27,2109440,51.1373,55.7629,47.0392,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-26-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.88045346391877
  episode_reward_mean: 49.96553086877012
  episode_reward_min: 45.721025479735175
  episodes_this_iter: 8
  episodes_total: 21120
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2112512
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00408516637980938
        max_q: 4.292484760284424
        mean_q: 4.10280179977417
        mean_td_error: -0.06334535777568817
        min_q: 3.924144983291626
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008141251280903816
        max_q: 3.2454686164855957
        mean_q: 2.987006187438965
        mean_td_error: -0.0859571173787117
        min_q: 2.7597029209136963
    num_steps_sampled: 2112512
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2063,4728.98,2112512,49.9655,53.8805,45.721,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-27-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.88045346391877
  episode_reward_mean: 49.705116452954265
  episode_reward_min: 45.66714832369532
  episodes_this_iter: 8
  episodes_total: 21152
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2115584
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018998426385223866
        max_q: 4.361633777618408
        mean_q: 4.208000183105469
        mean_td_error: -0.007504105567932129
        min_q: 4.097130298614502
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009300011210143566
        max_q: 2.7885003089904785
        mean_q: 2.5619990825653076
        mean_td_error: 0.1131168082356453
        min_q: 2.3656625747680664
    num_steps_sampled: 2115584
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2066,4734.57,2115584,49.7051,53.8805,45.6671,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-27-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.88045346391877
  episode_reward_mean: 49.666057945429564
  episode_reward_min: 45.66714832369532
  episodes_this_iter: 8
  episodes_total: 21184
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2118656
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004917497280985117
        max_q: 4.117775917053223
        mean_q: 4.039361953735352
        mean_td_error: -0.07769923657178879
        min_q: 3.974212169647217
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.049589887261390686
        max_q: 2.7667691707611084
        mean_q: 2.368950605392456
        mean_td_error: -0.5060648918151855
        min_q: 2.106306314468384
    num_steps_sampled: 2118656
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2069,4740.06,2118656,49.6661,53.8805,45.6671,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-27-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.811128802914304
  episode_reward_mean: 48.80920821470613
  episode_reward_min: 45.10733708923811
  episodes_this_iter: 16
  episodes_total: 21216
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2121728
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00434202840551734
        max_q: 4.194664478302002
        mean_q: 4.093752861022949
        mean_td_error: -0.06425300240516663
        min_q: 3.922201156616211
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.05341804027557373
        max_q: 1.8790209293365479
        mean_q: 1.3840396404266357
        mean_td_error: -0.5643145442008972
        min_q: 1.162139892578125
    num_steps_sampled: 2121728
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2072,4745.48,2121728,48.8092,53.8111,45.1073,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-27-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.811128802914304
  episode_reward_mean: 48.93608662578936
  episode_reward_min: 45.10733708923811
  episodes_this_iter: 16
  episodes_total: 21248
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2124800
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018505211919546127
        max_q: 4.230736255645752
        mean_q: 4.143982410430908
        mean_td_error: 0.030407734215259552
        min_q: 4.021490097045898
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013328923843801022
        max_q: 1.9399206638336182
        mean_q: 1.687744379043579
        mean_td_error: -0.1269458383321762
        min_q: 1.42852783203125
    num_steps_sampled: 2124800
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2075,4751.08,2124800,48.9361,53.8111,45.1073,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-27-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.25754253239354
  episode_reward_mean: 48.719168090540116
  episode_reward_min: 45.10733708923811
  episodes_this_iter: 8
  episodes_total: 21272
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2127872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016596775967627764
        max_q: 4.425872325897217
        mean_q: 4.361727714538574
        mean_td_error: 0.025058597326278687
        min_q: 4.22121524810791
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011939441785216331
        max_q: 2.1644396781921387
        mean_q: 2.023888349533081
        mean_td_error: -0.12099094688892365
        min_q: 1.8802505731582642
    num_steps_sampled: 2127872
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2078,4756.77,2127872,48.7192,59.2575,45.1073,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-27-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.25754253239354
  episode_reward_mean: 50.75107942071628
  episode_reward_min: 45.10733708923811
  episodes_this_iter: 8
  episodes_total: 21304
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2130944
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002270192839205265
        max_q: 4.040554523468018
        mean_q: 3.9053401947021484
        mean_td_error: -0.034517474472522736
        min_q: 3.7584609985351562
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01645681820809841
        max_q: 3.1820197105407715
        mean_q: 1.7253272533416748
        mean_td_error: -0.13697679340839386
        min_q: 1.1910898685455322
    num_steps_sampled: 2130944
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2081,4762.45,2130944,50.7511,59.2575,45.1073,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-27-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.25754253239354
  episode_reward_mean: 51.504259180143684
  episode_reward_min: 45.81923701301904
  episodes_this_iter: 8
  episodes_total: 21336
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2134016
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021396158263087273
        max_q: 3.633815050125122
        mean_q: 3.437788724899292
        mean_td_error: -0.351167768239975
        min_q: 3.314148426055908
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0045153130777180195
        max_q: 2.19063401222229
        mean_q: 2.0780603885650635
        mean_td_error: -0.04447847604751587
        min_q: 1.97481107711792
    num_steps_sampled: 2134016
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2084,4768.1,2134016,51.5043,59.2575,45.8192,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-27-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.86004944968931
  episode_reward_mean: 51.51097845170162
  episode_reward_min: 44.78013108330111
  episodes_this_iter: 8
  episodes_total: 21368
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2137088
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013678155839443207
        max_q: 3.2605092525482178
        mean_q: 3.013488531112671
        mean_td_error: -0.2217310070991516
        min_q: 2.7071611881256104
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008278902620077133
        max_q: 2.9792890548706055
        mean_q: 2.919348955154419
        mean_td_error: 0.10910601168870926
        min_q: 2.8539085388183594
    num_steps_sampled: 2137088
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2087,4773.76,2137088,51.511,57.86,44.7801,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-27-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.3545902643014
  episode_reward_mean: 50.081802584659854
  episode_reward_min: 44.78013108330111
  episodes_this_iter: 16
  episodes_total: 21400
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2140160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013694227673113346
        max_q: 3.4869978427886963
        mean_q: 3.199857473373413
        mean_td_error: -0.19033676385879517
        min_q: 3.045818328857422
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002743086777627468
        max_q: 3.600111961364746
        mean_q: 3.528881549835205
        mean_td_error: -0.03549136221408844
        min_q: 3.371792793273926
    num_steps_sampled: 2140160
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2090,4779.46,2140160,50.0818,54.3546,44.7801,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-27-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 52.86498664558318
  episode_reward_mean: 49.54633925968808
  episode_reward_min: 44.78013108330111
  episodes_this_iter: 16
  episodes_total: 21432
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2143232
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02074558287858963
        max_q: 3.250302314758301
        mean_q: 2.3145596981048584
        mean_td_error: -0.27827826142311096
        min_q: 1.7546758651733398
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030012433417141438
        max_q: 3.628788948059082
        mean_q: 3.4873664379119873
        mean_td_error: -0.01785137504339218
        min_q: 3.4203808307647705
    num_steps_sampled: 2143232
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2093,4785.36,2143232,49.5463,52.865,44.7801,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-28-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.183336736683806
  episode_reward_mean: 50.88504736463785
  episode_reward_min: 44.78013108330111
  episodes_this_iter: 8
  episodes_total: 21456
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2146304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002679134253412485
        max_q: 1.8384569883346558
        mean_q: 1.6910842657089233
        mean_td_error: -0.029618944972753525
        min_q: 1.521681785583496
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006288725882768631
        max_q: 3.7981414794921875
        mean_q: 3.701056480407715
        mean_td_error: -0.0893772691488266
        min_q: 3.558302402496338
    num_steps_sampled: 2146304
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2096,4790.98,2146304,50.885,57.1833,44.7801,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-28-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.19234029638812
  episode_reward_mean: 52.48852128759747
  episode_reward_min: 45.70953632367358
  episodes_this_iter: 8
  episodes_total: 21488
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2149376
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0060325320810079575
        max_q: 2.6018669605255127
        mean_q: 2.508544445037842
        mean_td_error: 0.11510714143514633
        min_q: 2.3488407135009766
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010963448323309422
        max_q: 3.908963680267334
        mean_q: 3.842320203781128
        mean_td_error: -0.14009082317352295
        min_q: 3.779510259628296
    num_steps_sampled: 2149376
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2099,4796.55,2149376,52.4885,60.1923,45.7095,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-28-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.19234029638812
  episode_reward_mean: 52.78880801932311
  episode_reward_min: 47.50594688328468
  episodes_this_iter: 8
  episodes_total: 21520
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2152448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005647513084113598
        max_q: 3.451699733734131
        mean_q: 3.2167880535125732
        mean_td_error: 0.11227814108133316
        min_q: 3.10465145111084
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012001763097941875
        max_q: 4.062761306762695
        mean_q: 3.863837957382202
        mean_td_error: -0.14903730154037476
        min_q: 3.7455310821533203
    num_steps_sampled: 2152448
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2102,4802.2,2152448,52.7888,60.1923,47.5059,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-28-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.19234029638812
  episode_reward_mean: 52.92890111185122
  episode_reward_min: 47.50594688328468
  episodes_this_iter: 8
  episodes_total: 21552
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2155520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0006272104219533503
        max_q: 3.6998894214630127
        mean_q: 3.6053318977355957
        mean_td_error: 0.004247158765792847
        min_q: 3.50431752204895
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029183016158640385
        max_q: 4.178999900817871
        mean_q: 4.099397659301758
        mean_td_error: 0.03125918656587601
        min_q: 4.022248268127441
    num_steps_sampled: 2155520
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2105,4808.39,2155520,52.9289,60.1923,47.5059,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-28-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.042461200759156
  episode_reward_mean: 52.15472286549083
  episode_reward_min: 47.50594688328468
  episodes_this_iter: 16
  episodes_total: 21584
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2158592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0006857596454210579
        max_q: 4.1884026527404785
        mean_q: 4.0999884605407715
        mean_td_error: -0.007868438959121704
        min_q: 4.014925956726074
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030654475558549166
        max_q: 4.271381855010986
        mean_q: 4.186304092407227
        mean_td_error: -0.03211018443107605
        min_q: 4.127558708190918
    num_steps_sampled: 2158592
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2108,4814.38,2158592,52.1547,56.0425,47.5059,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-28-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.042461200759156
  episode_reward_mean: 53.210975583328455
  episode_reward_min: 51.10444749522671
  episodes_this_iter: 16
  episodes_total: 21616
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2161664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013428939273580909
        max_q: 4.422599792480469
        mean_q: 4.341341495513916
        mean_td_error: -0.025757744908332825
        min_q: 4.2531938552856445
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0059691122733056545
        max_q: 4.285475730895996
        mean_q: 4.167252063751221
        mean_td_error: -0.07696625590324402
        min_q: 4.0693817138671875
    num_steps_sampled: 2161664
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2111,4820.54,2161664,53.211,56.0425,51.1044,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-28-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.897344714051414
  episode_reward_mean: 52.31647819917069
  episode_reward_min: 49.425633598178706
  episodes_this_iter: 8
  episodes_total: 21640
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2164736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016496885800734162
        max_q: 4.40603494644165
        mean_q: 4.317849159240723
        mean_td_error: -0.027589187026023865
        min_q: 4.196753978729248
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0074930340051651
        max_q: 4.6267547607421875
        mean_q: 4.477693557739258
        mean_td_error: -0.09969253838062286
        min_q: 4.412252426147461
    num_steps_sampled: 2164736
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2114,4826.76,2164736,52.3165,55.8973,49.4256,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-28-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.58318037606265
  episode_reward_mean: 53.13176352480732
  episode_reward_min: 47.0944856273808
  episodes_this_iter: 8
  episodes_total: 21672
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2167808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011974668595939875
        max_q: 4.101311683654785
        mean_q: 4.04514217376709
        mean_td_error: -0.015733756124973297
        min_q: 3.8946797847747803
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004136132542043924
        max_q: 4.452924728393555
        mean_q: 4.285064697265625
        mean_td_error: -0.03207135200500488
        min_q: 4.133090019226074
    num_steps_sampled: 2167808
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2117,4833.02,2167808,53.1318,58.5832,47.0945,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-28-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.58318037606265
  episode_reward_mean: 52.43409426189981
  episode_reward_min: 45.98810764620089
  episodes_this_iter: 8
  episodes_total: 21704
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2170880
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004167600534856319
        max_q: 4.24578857421875
        mean_q: 4.144617557525635
        mean_td_error: -0.08533604443073273
        min_q: 4.085575103759766
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009389368817210197
        max_q: 4.068314552307129
        mean_q: 3.873058795928955
        mean_td_error: -0.12681856751441956
        min_q: 3.603346824645996
    num_steps_sampled: 2170880
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2120,4839.22,2170880,52.4341,58.5832,45.9881,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-28-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.58318037606265
  episode_reward_mean: 53.18938451713344
  episode_reward_min: 45.98810764620089
  episodes_this_iter: 8
  episodes_total: 21736
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2173952
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014674230478703976
        max_q: 4.417698383331299
        mean_q: 4.304869174957275
        mean_td_error: 0.018422216176986694
        min_q: 4.191028118133545
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014408438466489315
        max_q: 3.5997629165649414
        mean_q: 3.430180549621582
        mean_td_error: -0.1741737425327301
        min_q: 3.277641773223877
    num_steps_sampled: 2173952
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2123,4845.15,2173952,53.1894,58.5832,45.9881,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-29-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.881857637627846
  episode_reward_mean: 51.70554755310127
  episode_reward_min: 45.98810764620089
  episodes_this_iter: 8
  episodes_total: 21768
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2177024
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007624842692166567
        max_q: 4.446799278259277
        mean_q: 4.222858428955078
        mean_td_error: -0.15452240407466888
        min_q: 4.038698673248291
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00599251314997673
        max_q: 3.6726341247558594
        mean_q: 3.567002773284912
        mean_td_error: -0.07726404815912247
        min_q: 3.499720335006714
    num_steps_sampled: 2177024
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2126,4850.96,2177024,51.7055,55.8819,45.9881,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-29-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.881857637627846
  episode_reward_mean: 52.44073871665676
  episode_reward_min: 49.76019859745546
  episodes_this_iter: 16
  episodes_total: 21800
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2180096
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013062888756394386
        max_q: 4.518723964691162
        mean_q: 4.416123390197754
        mean_td_error: -0.005542367696762085
        min_q: 4.360383033752441
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027809671591967344
        max_q: 3.9616141319274902
        mean_q: 3.7659690380096436
        mean_td_error: -0.010014943778514862
        min_q: 3.52321195602417
    num_steps_sampled: 2180096
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2129,4856.66,2180096,52.4407,55.8819,49.7602,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-29-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.881857637627846
  episode_reward_mean: 51.39714235561436
  episode_reward_min: 48.35774531167109
  episodes_this_iter: 8
  episodes_total: 21824
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2183168
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007982241921126842
        max_q: 4.537473201751709
        mean_q: 4.453915596008301
        mean_td_error: -0.15617884695529938
        min_q: 4.376487731933594
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013122498989105225
        max_q: 3.5487213134765625
        mean_q: 3.437364339828491
        mean_td_error: -0.15780919790267944
        min_q: 3.3527557849884033
    num_steps_sampled: 2183168
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2132,4862.5,2183168,51.3971,55.8819,48.3577,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-29-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.79292841070741
  episode_reward_mean: 50.842668971951326
  episode_reward_min: 47.68181498979196
  episodes_this_iter: 8
  episodes_total: 21856
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2186240
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002890009665861726
        max_q: 4.566778182983398
        mean_q: 4.3622870445251465
        mean_td_error: 0.0465414822101593
        min_q: 4.251933574676514
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018176741898059845
        max_q: 3.770646095275879
        mean_q: 3.458287477493286
        mean_td_error: -0.22798386216163635
        min_q: 3.0752193927764893
    num_steps_sampled: 2186240
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2135,4868.64,2186240,50.8427,55.7929,47.6818,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-29-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.74926604696168
  episode_reward_mean: 50.70207310896676
  episode_reward_min: 47.68181498979196
  episodes_this_iter: 8
  episodes_total: 21888
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2189312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006516587920486927
        max_q: 4.396268844604492
        mean_q: 4.250087738037109
        mean_td_error: -0.13372908532619476
        min_q: 4.112628936767578
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004511469975113869
        max_q: 3.8376946449279785
        mean_q: 3.7313928604125977
        mean_td_error: -0.04606100916862488
        min_q: 3.6353280544281006
    num_steps_sampled: 2189312
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2138,4874.38,2189312,50.7021,58.7493,47.6818,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-29-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.74926604696168
  episode_reward_mean: 51.18385812454019
  episode_reward_min: 47.68181498979196
  episodes_this_iter: 8
  episodes_total: 21920
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2192384
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0067333681508898735
        max_q: 4.2133073806762695
        mean_q: 4.053961753845215
        mean_td_error: -0.1315079778432846
        min_q: 3.9218173027038574
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00431714067235589
        max_q: 3.9205856323242188
        mean_q: 3.7519805431365967
        mean_td_error: 0.03775151073932648
        min_q: 3.6262383460998535
    num_steps_sampled: 2192384
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2141,4880.09,2192384,51.1839,58.7493,47.6818,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-29-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.74926604696168
  episode_reward_mean: 51.04538809463951
  episode_reward_min: 47.90493539407653
  episodes_this_iter: 8
  episodes_total: 21952
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2195456
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011828753165900707
        max_q: 4.086461544036865
        mean_q: 3.849303960800171
        mean_td_error: -0.2118188887834549
        min_q: 3.7187416553497314
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005697519052773714
        max_q: 4.154334545135498
        mean_q: 3.8690948486328125
        mean_td_error: -0.05714619904756546
        min_q: 3.7409095764160156
    num_steps_sampled: 2195456
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2144,4885.95,2195456,51.0454,58.7493,47.9049,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-29-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.811922335418174
  episode_reward_mean: 51.72127333330543
  episode_reward_min: 47.94862389917986
  episodes_this_iter: 16
  episodes_total: 21984
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2198528
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014259792864322662
        max_q: 3.7605481147766113
        mean_q: 3.532074213027954
        mean_td_error: -0.2681048512458801
        min_q: 3.4184985160827637
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010162060149013996
        max_q: 4.217628002166748
        mean_q: 4.170693874359131
        mean_td_error: 0.11750449240207672
        min_q: 4.0072221755981445
    num_steps_sampled: 2198528
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2147,4891.85,2198528,51.7213,57.8119,47.9486,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-29-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.811922335418174
  episode_reward_mean: 52.11519822931318
  episode_reward_min: 47.94862389917986
  episodes_this_iter: 16
  episodes_total: 22016
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2201600
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0075376760214567184
        max_q: 3.5032615661621094
        mean_q: 3.3770265579223633
        mean_td_error: -0.13499915599822998
        min_q: 3.1875362396240234
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0091182766482234
        max_q: 4.188715934753418
        mean_q: 3.951794385910034
        mean_td_error: -0.10170798003673553
        min_q: 3.874237060546875
    num_steps_sampled: 2201600
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2150,4897.73,2201600,52.1152,57.8119,47.9486,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-29-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.811922335418174
  episode_reward_mean: 52.14305908828581
  episode_reward_min: 47.066144396102
  episodes_this_iter: 8
  episodes_total: 22040
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2204672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009344179183244705
        max_q: 3.812821865081787
        mean_q: 3.690563678741455
        mean_td_error: -0.17136326432228088
        min_q: 3.5798773765563965
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.027557503432035446
        max_q: 3.864060401916504
        mean_q: 3.375999927520752
        mean_td_error: -0.3217983841896057
        min_q: 2.870305299758911
    num_steps_sampled: 2204672
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2153,4903.68,2204672,52.1431,57.8119,47.0661,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-30-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.6945960702587
  episode_reward_mean: 52.29765967290323
  episode_reward_min: 47.066144396102
  episodes_this_iter: 8
  episodes_total: 22072
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2207744
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0047244420275092125
        max_q: 4.243438720703125
        mean_q: 4.053116798400879
        mean_td_error: -0.04550815373659134
        min_q: 3.7602903842926025
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03146858885884285
        max_q: 2.322077989578247
        mean_q: 2.0191447734832764
        mean_td_error: -0.36739686131477356
        min_q: 1.7692887783050537
    num_steps_sampled: 2207744
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2156,4909.55,2207744,52.2977,57.6946,47.0661,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-30-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.40581183966728
  episode_reward_mean: 52.33520583198873
  episode_reward_min: 47.066144396102
  episodes_this_iter: 8
  episodes_total: 22104
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2210816
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03490913286805153
        max_q: 4.102760314941406
        mean_q: 3.7554359436035156
        mean_td_error: -0.43928006291389465
        min_q: 3.608030080795288
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00525064067915082
        max_q: 2.390981674194336
        mean_q: 2.2072014808654785
        mean_td_error: 0.028237968683242798
        min_q: 2.034761905670166
    num_steps_sampled: 2210816
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2159,4915.46,2210816,52.3352,58.4058,47.0661,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-30-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.40581183966728
  episode_reward_mean: 52.594461302954016
  episode_reward_min: 47.50220130973204
  episodes_this_iter: 8
  episodes_total: 22136
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2213888
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005777995102107525
        max_q: 4.1168389320373535
        mean_q: 3.971832275390625
        mean_td_error: -0.06357625126838684
        min_q: 3.828390121459961
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013422614894807339
        max_q: 2.3638482093811035
        mean_q: 2.142576217651367
        mean_td_error: -0.16455456614494324
        min_q: 1.9253594875335693
    num_steps_sampled: 2213888
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2162,4921.41,2213888,52.5945,58.4058,47.5022,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-30-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.40581183966728
  episode_reward_mean: 52.319288223066586
  episode_reward_min: 47.50220130973204
  episodes_this_iter: 16
  episodes_total: 22168
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2216960
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017079362645745277
        max_q: 3.982403039932251
        mean_q: 3.6331968307495117
        mean_td_error: -0.19453200697898865
        min_q: 3.4553093910217285
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005753827281296253
        max_q: 2.66851544380188
        mean_q: 2.4761922359466553
        mean_td_error: -0.05417393893003464
        min_q: 2.315070390701294
    num_steps_sampled: 2216960
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2165,4927.49,2216960,52.3193,58.4058,47.5022,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-30-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.18873747968321
  episode_reward_mean: 52.13331727519745
  episode_reward_min: 47.50220130973204
  episodes_this_iter: 16
  episodes_total: 22200
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2220032
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0074949017725884914
        max_q: 3.8784613609313965
        mean_q: 3.7744390964508057
        mean_td_error: -0.08984053134918213
        min_q: 3.675997495651245
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02844015695154667
        max_q: 2.6781272888183594
        mean_q: 2.451080083847046
        mean_td_error: -0.33562952280044556
        min_q: 2.2838821411132812
    num_steps_sampled: 2220032
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2168,4933.7,2220032,52.1333,56.1887,47.5022,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-30-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.18873747968321
  episode_reward_mean: 51.57844196835836
  episode_reward_min: 47.50220130973204
  episodes_this_iter: 8
  episodes_total: 22224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2223104
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.020914370194077492
        max_q: 3.9405651092529297
        mean_q: 3.603091239929199
        mean_td_error: -0.2427223175764084
        min_q: 3.3446598052978516
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022398389410227537
        max_q: 2.741831064224243
        mean_q: 2.6333460807800293
        mean_td_error: -0.00260239839553833
        min_q: 2.55236554145813
    num_steps_sampled: 2223104
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2171,4939.88,2223104,51.5784,56.1887,47.5022,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-30-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.99361472323425
  episode_reward_mean: 51.88812383657578
  episode_reward_min: 47.75888210046003
  episodes_this_iter: 8
  episodes_total: 22256
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2226176
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0074173337779939175
        max_q: 3.6332786083221436
        mean_q: 3.465742349624634
        mean_td_error: -0.073531873524189
        min_q: 3.360091209411621
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017422610893845558
        max_q: 2.7866601943969727
        mean_q: 2.688988447189331
        mean_td_error: -0.22424735128879547
        min_q: 2.6038010120391846
    num_steps_sampled: 2226176
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2174,4945.98,2226176,51.8881,57.9936,47.7589,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-30-49
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.99361472323425
  episode_reward_mean: 51.16461505173103
  episode_reward_min: 45.420416854743415
  episodes_this_iter: 8
  episodes_total: 22288
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2229248
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002735479734838009
        max_q: 3.5609912872314453
        mean_q: 3.4959864616394043
        mean_td_error: -0.036069005727767944
        min_q: 3.3888792991638184
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02406824193894863
        max_q: 2.816647529602051
        mean_q: 2.520233154296875
        mean_td_error: -0.2678682506084442
        min_q: 2.2806851863861084
    num_steps_sampled: 2229248
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2177,4952.27,2229248,51.1646,57.9936,45.4204,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-30-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.99361472323425
  episode_reward_mean: 51.37838687678488
  episode_reward_min: 45.420416854743415
  episodes_this_iter: 8
  episodes_total: 22320
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2232320
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003610900603234768
        max_q: 3.4634337425231934
        mean_q: 3.400806427001953
        mean_td_error: -0.05437356233596802
        min_q: 3.3528411388397217
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.026988619938492775
        max_q: 2.8470876216888428
        mean_q: 2.6316661834716797
        mean_td_error: -0.31547272205352783
        min_q: 2.518399715423584
    num_steps_sampled: 2232320
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2180,4958.74,2232320,51.3784,57.9936,45.4204,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-31-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.171293243720115
  episode_reward_mean: 52.02613870681389
  episode_reward_min: 45.420416854743415
  episodes_this_iter: 16
  episodes_total: 22352
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2235392
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035764866042882204
        max_q: 3.468618392944336
        mean_q: 3.370741128921509
        mean_td_error: -0.04682769626379013
        min_q: 3.2939083576202393
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01785537227988243
        max_q: 2.805891990661621
        mean_q: 2.713597297668457
        mean_td_error: -0.20952944457530975
        min_q: 2.615431308746338
    num_steps_sampled: 2235392
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2183,4964.87,2235392,52.0261,56.1713,45.4204,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-31-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.171293243720115
  episode_reward_mean: 52.407521994581295
  episode_reward_min: 48.256971050663594
  episodes_this_iter: 16
  episodes_total: 22384
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2238464
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024335558991879225
        max_q: 3.703448534011841
        mean_q: 3.540959119796753
        mean_td_error: 0.029003508388996124
        min_q: 3.418592929840088
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029077681247144938
        max_q: 3.240050792694092
        mean_q: 3.1337978839874268
        mean_td_error: -0.017426781356334686
        min_q: 3.0192627906799316
    num_steps_sampled: 2238464
    num_steps

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2186,4970.82,2238464,52.4075,56.1713,48.257,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-31-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.171293243720115
  episode_reward_mean: 52.5270836949376
  episode_reward_min: 48.256971050663594
  episodes_this_iter: 8
  episodes_total: 22408
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2241536
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0050833337008953094
        max_q: 3.613419532775879
        mean_q: 3.448147773742676
        mean_td_error: -0.058907650411129
        min_q: 3.2932560443878174
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003511575283482671
        max_q: 3.4731760025024414
        mean_q: 3.3065531253814697
        mean_td_error: -0.03354538977146149
        min_q: 3.1685001850128174
    num_steps_sampled: 2241536
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2189,4976.77,2241536,52.5271,56.1713,48.257,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-31-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.77945173615642
  episode_reward_mean: 52.435384670680875
  episode_reward_min: 48.256971050663594
  episodes_this_iter: 8
  episodes_total: 22440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2244608
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032016453333199024
        max_q: 3.723214626312256
        mean_q: 3.6732075214385986
        mean_td_error: -0.05868776887655258
        min_q: 3.612656831741333
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003301008604466915
        max_q: 3.697211503982544
        mean_q: 3.53369140625
        mean_td_error: 0.04352650046348572
        min_q: 3.3917858600616455
    num_steps_sampled: 2244608
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2192,4982.91,2244608,52.4354,55.7795,48.257,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-31-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.910783009607236
  episode_reward_mean: 52.901058062686104
  episode_reward_min: 49.885238486522894
  episodes_this_iter: 8
  episodes_total: 22472
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2247680
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009108605794608593
        max_q: 3.88777494430542
        mean_q: 3.7005503177642822
        mean_td_error: -0.16656151413917542
        min_q: 3.594280481338501
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00199111457914114
        max_q: 3.810122013092041
        mean_q: 3.697352886199951
        mean_td_error: -0.03345387428998947
        min_q: 3.6078810691833496
    num_steps_sampled: 2247680
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2195,4989.02,2247680,52.9011,56.9108,49.8852,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-31-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.910783009607236
  episode_reward_mean: 53.29811357384201
  episode_reward_min: 51.44072907686729
  episodes_this_iter: 8
  episodes_total: 22504
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2250752
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00286024808883667
        max_q: 4.127926349639893
        mean_q: 4.038876056671143
        mean_td_error: -0.04072839766740799
        min_q: 3.97373104095459
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008073493372648954
        max_q: 4.255154132843018
        mean_q: 4.140352249145508
        mean_td_error: 0.008349902927875519
        min_q: 4.015474319458008
    num_steps_sampled: 2250752
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2198,4995.07,2250752,53.2981,56.9108,51.4407,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-31-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.7945635071034
  episode_reward_mean: 54.035259752091825
  episode_reward_min: 51.063882587111046
  episodes_this_iter: 8
  episodes_total: 22536
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2253824
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004273093305528164
        max_q: 3.960844039916992
        mean_q: 3.771475315093994
        mean_td_error: -0.03614814579486847
        min_q: 3.634734630584717
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023470530286431313
        max_q: 4.631226062774658
        mean_q: 4.490271091461182
        mean_td_error: 0.051272422075271606
        min_q: 4.408311367034912
    num_steps_sampled: 2253824
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2201,5001.02,2253824,54.0353,57.7946,51.0639,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-31-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.7945635071034
  episode_reward_mean: 53.20421013797447
  episode_reward_min: 50.72841778269395
  episodes_this_iter: 16
  episodes_total: 22568
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2256896
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009572109207510948
        max_q: 3.213711738586426
        mean_q: 3.058016300201416
        mean_td_error: -0.15915139019489288
        min_q: 2.9546754360198975
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004971352405846119
        max_q: 4.27871036529541
        mean_q: 4.1590657234191895
        mean_td_error: -0.11263617873191833
        min_q: 3.9228081703186035
    num_steps_sampled: 2256896
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2204,5007.01,2256896,53.2042,57.7946,50.7284,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-31-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.7945635071034
  episode_reward_mean: 52.99033400105636
  episode_reward_min: 49.262173995981044
  episodes_this_iter: 8
  episodes_total: 22592
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2259968
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012690045405179262
        max_q: 3.3164258003234863
        mean_q: 3.248613119125366
        mean_td_error: 0.010567963123321533
        min_q: 3.1319832801818848
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005371325649321079
        max_q: 4.050663948059082
        mean_q: 3.9647910594940186
        mean_td_error: -0.10909665375947952
        min_q: 3.908385992050171
    num_steps_sampled: 2259968
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2207,5013.04,2259968,52.9903,57.7946,49.2622,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-31-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.7945635071034
  episode_reward_mean: 53.12085388733771
  episode_reward_min: 49.262173995981044
  episodes_this_iter: 8
  episodes_total: 22624
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2263040
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.023278919979929924
        max_q: 3.3631083965301514
        mean_q: 2.989786386489868
        mean_td_error: -0.41634494066238403
        min_q: 2.79239821434021
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015708765713497996
        max_q: 4.176497459411621
        mean_q: 4.115150451660156
        mean_td_error: -0.01807282865047455
        min_q: 4.037129878997803
    num_steps_sampled: 2263040
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2210,5019.17,2263040,53.1209,57.7946,49.2622,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-32-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.195269233567124
  episode_reward_mean: 53.95364594221905
  episode_reward_min: 49.262173995981044
  episodes_this_iter: 8
  episodes_total: 22656
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2266112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00755060650408268
        max_q: 2.88539981842041
        mean_q: 2.541876792907715
        mean_td_error: -0.1352272927761078
        min_q: 2.298919677734375
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001710296026431024
        max_q: 4.252482891082764
        mean_q: 4.149168968200684
        mean_td_error: -0.020508810877799988
        min_q: 4.05464506149292
    num_steps_sampled: 2266112
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2213,5025.36,2266112,53.9536,58.1953,49.2622,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-32-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.195269233567124
  episode_reward_mean: 53.33483782869543
  episode_reward_min: 47.52258614995584
  episodes_this_iter: 8
  episodes_total: 22688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2269184
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.022871050983667374
        max_q: 2.601259708404541
        mean_q: 2.312027931213379
        mean_td_error: -0.28948262333869934
        min_q: 2.056147575378418
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004571815952658653
        max_q: 4.266997337341309
        mean_q: 4.0310959815979
        mean_td_error: -0.07862646877765656
        min_q: 3.8836023807525635
    num_steps_sampled: 2269184
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2216,5031.52,2269184,53.3348,58.1953,47.5226,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-32-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.0629446991453
  episode_reward_mean: 53.34710769394876
  episode_reward_min: 45.12939171236656
  episodes_this_iter: 8
  episodes_total: 22720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2272256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012066788040101528
        max_q: 1.628652572631836
        mean_q: 1.0870057344436646
        mean_td_error: -0.07625407725572586
        min_q: 0.6665359735488892
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007478040177375078
        max_q: 4.107640743255615
        mean_q: 3.95516300201416
        mean_td_error: -0.14779871702194214
        min_q: 3.7612504959106445
    num_steps_sampled: 2272256
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2219,5037.73,2272256,53.3471,59.0629,45.1294,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-32-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.0629446991453
  episode_reward_mean: 52.77898604887316
  episode_reward_min: 45.12939171236656
  episodes_this_iter: 16
  episodes_total: 22752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2275328
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.052522074431180954
        max_q: 1.368711233139038
        mean_q: 0.9543896913528442
        mean_td_error: -0.6790511608123779
        min_q: 0.44113290309906006
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008897160179913044
        max_q: 4.00351619720459
        mean_q: 3.9397811889648438
        mean_td_error: -0.18716369569301605
        min_q: 3.8634955883026123
    num_steps_sampled: 2275328
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2222,5044.12,2275328,52.779,59.0629,45.1294,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-32-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.0629446991453
  episode_reward_mean: 53.211668474682995
  episode_reward_min: 45.12939171236656
  episodes_this_iter: 16
  episodes_total: 22784
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2278400
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008019132539629936
        max_q: 1.7587158679962158
        mean_q: 1.5331637859344482
        mean_td_error: -0.09655893594026566
        min_q: 1.3891489505767822
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00241259578615427
        max_q: 4.166728973388672
        mean_q: 3.968153476715088
        mean_td_error: -0.041136376559734344
        min_q: 3.8742356300354004
    num_steps_sampled: 2278400
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2225,5050.29,2278400,53.2117,59.0629,45.1294,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-32-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.0629446991453
  episode_reward_mean: 53.25343771090129
  episode_reward_min: 45.12939171236656
  episodes_this_iter: 8
  episodes_total: 22808
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2281472
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004672405309975147
        max_q: 2.403536796569824
        mean_q: 2.304774284362793
        mean_td_error: -0.049497634172439575
        min_q: 2.1586830615997314
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002013303805142641
        max_q: 4.253533840179443
        mean_q: 4.151845932006836
        mean_td_error: 0.029251545667648315
        min_q: 4.042135238647461
    num_steps_sampled: 2281472
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2228,5056.47,2281472,53.2534,59.0629,45.1294,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-32-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.19283110783704
  episode_reward_mean: 52.550519405332196
  episode_reward_min: 48.7292091742199
  episodes_this_iter: 8
  episodes_total: 22840
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2284544
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002973641501739621
        max_q: 2.7522130012512207
        mean_q: 2.6839187145233154
        mean_td_error: -0.017295032739639282
        min_q: 2.580505132675171
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00429530069231987
        max_q: 4.206600666046143
        mean_q: 4.120975494384766
        mean_td_error: -0.08350761234760284
        min_q: 4.008615970611572
    num_steps_sampled: 2284544
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2231,5062.94,2284544,52.5505,58.1928,48.7292,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-32-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.66347545396327
  episode_reward_mean: 53.1471223578858
  episode_reward_min: 48.7292091742199
  episodes_this_iter: 8
  episodes_total: 22872
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2287616
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01687171682715416
        max_q: 3.155421018600464
        mean_q: 2.978729248046875
        mean_td_error: -0.21726223826408386
        min_q: 2.8052213191986084
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003075768006965518
        max_q: 4.0902791023254395
        mean_q: 3.956345319747925
        mean_td_error: 0.05202532559633255
        min_q: 3.822916030883789
    num_steps_sampled: 2287616
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2234,5069.48,2287616,53.1471,58.6635,48.7292,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-32-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.66347545396327
  episode_reward_mean: 53.24292643138227
  episode_reward_min: 49.81146868398586
  episodes_this_iter: 8
  episodes_total: 22904
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2290688
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003207923611626029
        max_q: 3.404470443725586
        mean_q: 3.3081021308898926
        mean_td_error: -0.03776194900274277
        min_q: 3.1230313777923584
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00366144604049623
        max_q: 3.9285061359405518
        mean_q: 3.6830549240112305
        mean_td_error: -0.058123886585235596
        min_q: 3.4660885334014893
    num_steps_sampled: 2290688
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2237,5075.6,2290688,53.2429,58.6635,49.8115,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-33-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.66347545396327
  episode_reward_mean: 53.521615394313
  episode_reward_min: 50.11510283621555
  episodes_this_iter: 16
  episodes_total: 22936
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2293760
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023601027205586433
        max_q: 3.792354106903076
        mean_q: 3.655205011367798
        mean_td_error: -0.013967156410217285
        min_q: 3.553589344024658
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006621649023145437
        max_q: 3.6760246753692627
        mean_q: 3.5970659255981445
        mean_td_error: -0.1315886676311493
        min_q: 3.505502462387085
    num_steps_sampled: 2293760
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2240,5081.64,2293760,53.5216,58.6635,50.1151,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-33-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.66347545396327
  episode_reward_mean: 53.04728498078211
  episode_reward_min: 47.49637275551446
  episodes_this_iter: 16
  episodes_total: 22968
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2296832
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0013726315228268504
        max_q: 3.999143600463867
        mean_q: 3.9346837997436523
        mean_td_error: -0.002732709050178528
        min_q: 3.890913963317871
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004351782612502575
        max_q: 3.7456507682800293
        mean_q: 3.613912582397461
        mean_td_error: 0.07122021913528442
        min_q: 3.412594795227051
    num_steps_sampled: 2296832
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2243,5087.87,2296832,53.0473,58.6635,47.4964,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-33-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.6173708737425
  episode_reward_mean: 53.6373991888357
  episode_reward_min: 47.49637275551446
  episodes_this_iter: 8
  episodes_total: 22992
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2299904
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020189539063721895
        max_q: 4.4576029777526855
        mean_q: 4.3082990646362305
        mean_td_error: -0.030359312891960144
        min_q: 4.198287010192871
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009068312123417854
        max_q: 3.611288070678711
        mean_q: 3.541267156600952
        mean_td_error: -0.1600572168827057
        min_q: 3.4486262798309326
    num_steps_sampled: 2299904
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2246,5094.96,2299904,53.6374,58.6174,47.4964,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-33-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.6173708737425
  episode_reward_mean: 54.93476299445024
  episode_reward_min: 47.49637275551446
  episodes_this_iter: 8
  episodes_total: 23024
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2302976
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016683657886460423
        max_q: 4.4703779220581055
        mean_q: 4.389297008514404
        mean_td_error: -0.0223359614610672
        min_q: 4.32432222366333
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018737128004431725
        max_q: 2.9999547004699707
        mean_q: 2.7126193046569824
        mean_td_error: -0.30811071395874023
        min_q: 2.4754421710968018
    num_steps_sampled: 2302976
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2249,5101.49,2302976,54.9348,58.6174,47.4964,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-33-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.6997543435792
  episode_reward_mean: 55.80760840423542
  episode_reward_min: 52.18667111843309
  episodes_this_iter: 8
  episodes_total: 23056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2306048
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020570470951497555
        max_q: 4.555623531341553
        mean_q: 4.463095188140869
        mean_td_error: 0.032482028007507324
        min_q: 4.399384021759033
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012134198099374771
        max_q: 2.37235689163208
        mean_q: 2.0100808143615723
        mean_td_error: -0.19322709739208221
        min_q: 1.6686570644378662
    num_steps_sampled: 2306048
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2252,5107.74,2306048,55.8076,58.6998,52.1867,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-33-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.6997543435792
  episode_reward_mean: 55.51022209049597
  episode_reward_min: 53.15119743484357
  episodes_this_iter: 8
  episodes_total: 23088
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2309120
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002709826920181513
        max_q: 4.683163642883301
        mean_q: 4.526221752166748
        mean_td_error: -0.036196961998939514
        min_q: 4.413363456726074
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01240538526326418
        max_q: 1.9659281969070435
        mean_q: 1.8052197694778442
        mean_td_error: -0.22189831733703613
        min_q: 1.6429731845855713
    num_steps_sampled: 2309120
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2255,5114,2309120,55.5102,58.6998,53.1512,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-33-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.6997543435792
  episode_reward_mean: 54.99884492144114
  episode_reward_min: 53.089420966926454
  episodes_this_iter: 16
  episodes_total: 23120
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2312192
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030395127832889557
        max_q: 4.7546844482421875
        mean_q: 4.58213472366333
        mean_td_error: -0.025155633687973022
        min_q: 4.459568023681641
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011867175810039043
        max_q: 2.438249111175537
        mean_q: 2.256174325942993
        mean_td_error: -0.22689591348171234
        min_q: 2.1398696899414062
    num_steps_sampled: 2312192
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2258,5120.69,2312192,54.9988,58.6998,53.0894,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-33-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.6997543435792
  episode_reward_mean: 54.069640126922856
  episode_reward_min: 49.97979569653013
  episodes_this_iter: 16
  episodes_total: 23152
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2315264
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010846205987036228
        max_q: 4.658294677734375
        mean_q: 4.597136497497559
        mean_td_error: -0.012208983302116394
        min_q: 4.5269598960876465
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019874549470841885
        max_q: 2.8038101196289062
        mean_q: 2.7086997032165527
        mean_td_error: -0.010072290897369385
        min_q: 2.597825765609741
    num_steps_sampled: 2315264
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2261,5127.68,2315264,54.0696,58.6998,49.9798,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-33-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.16029388763802
  episode_reward_mean: 53.7695979729456
  episode_reward_min: 49.97979569653013
  episodes_this_iter: 8
  episodes_total: 23168
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2317312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017508896999061108
        max_q: 4.7064948081970215
        mean_q: 4.60357666015625
        mean_td_error: -0.005906909704208374
        min_q: 4.530481338500977
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005022210534662008
        max_q: 3.074472427368164
        mean_q: 2.9001688957214355
        mean_td_error: -0.08005720376968384
        min_q: 2.786581039428711
    num_steps_sampled: 2317312
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2263,5133.04,2317312,53.7696,57.1603,49.9798,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-34-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.16029388763802
  episode_reward_mean: 53.54220247313984
  episode_reward_min: 49.97979569653013
  episodes_this_iter: 16
  episodes_total: 23192
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2319360
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004544372670352459
        max_q: 4.909599304199219
        mean_q: 4.808290958404541
        mean_td_error: 0.0784350037574768
        min_q: 4.743179798126221
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004750612657517195
        max_q: 3.2013025283813477
        mean_q: 3.1213037967681885
        mean_td_error: -0.08037956058979034
        min_q: 3.047938823699951
    num_steps_sampled: 2319360
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2265,5139.13,2319360,53.5422,57.1603,49.9798,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-34-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.16029388763802
  episode_reward_mean: 53.35686457891954
  episode_reward_min: 49.97979569653013
  episodes_this_iter: 8
  episodes_total: 23208
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2321408
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033954328391700983
        max_q: 4.849580764770508
        mean_q: 4.759930610656738
        mean_td_error: 0.04643052816390991
        min_q: 4.707403182983398
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026177617255598307
        max_q: 3.495382070541382
        mean_q: 3.3462202548980713
        mean_td_error: -0.04572802037000656
        min_q: 3.3061161041259766
    num_steps_sampled: 2321408
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2267,5145.96,2321408,53.3569,57.1603,49.9798,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-34-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.8297279355184
  episode_reward_mean: 52.7946991557084
  episode_reward_min: 49.97979569653013
  episodes_this_iter: 8
  episodes_total: 23232
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2323456
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0034270151518285275
        max_q: 4.871490478515625
        mean_q: 4.692533016204834
        mean_td_error: 0.004271253943443298
        min_q: 4.611122131347656
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004758041817694902
        max_q: 3.697725534439087
        mean_q: 3.5521786212921143
        mean_td_error: -0.07370629161596298
        min_q: 3.4400689601898193
    num_steps_sampled: 2323456
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2269,5152.59,2323456,52.7947,56.8297,49.9798,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-34-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.8297279355184
  episode_reward_mean: 53.40170230675612
  episode_reward_min: 49.97979569653013
  episodes_this_iter: 8
  episodes_total: 23248
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2325504
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0059701502323150635
        max_q: 4.622023105621338
        mean_q: 4.456067085266113
        mean_td_error: -0.07989998161792755
        min_q: 4.2475690841674805
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006293816026300192
        max_q: 3.6396713256835938
        mean_q: 3.505617618560791
        mean_td_error: -0.11051823943853378
        min_q: 3.404387950897217
    num_steps_sampled: 2325504
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2271,5158.87,2325504,53.4017,56.8297,49.9798,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-34-28
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.8297279355184
  episode_reward_mean: 53.2678994610575
  episode_reward_min: 48.679323917746245
  episodes_this_iter: 8
  episodes_total: 23272
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2327552
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010497944429516792
        max_q: 4.788629055023193
        mean_q: 4.565611362457275
        mean_td_error: 0.15801088511943817
        min_q: 4.194155216217041
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033722338266670704
        max_q: 3.7746541500091553
        mean_q: 3.6882433891296387
        mean_td_error: -0.06130891293287277
        min_q: 3.501394748687744
    num_steps_sampled: 2327552
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2273,5164.65,2327552,53.2679,56.8297,48.6793,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-34-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.76811250999252
  episode_reward_mean: 52.321573110684085
  episode_reward_min: 47.899836562148856
  episodes_this_iter: 16
  episodes_total: 23296
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2329600
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027305763214826584
        max_q: 4.357028484344482
        mean_q: 4.321002006530762
        mean_td_error: -0.03218965232372284
        min_q: 4.292874336242676
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013358106836676598
        max_q: 3.798539161682129
        mean_q: 3.6591949462890625
        mean_td_error: -0.234544575214386
        min_q: 3.5497798919677734
    num_steps_sampled: 2329600
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2275,5169.61,2329600,52.3216,56.7681,47.8998,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-34-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.63983499836403
  episode_reward_mean: 52.87420405764378
  episode_reward_min: 47.899836562148856
  episodes_this_iter: 8
  episodes_total: 23320
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2332672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01914602518081665
        max_q: 4.45920991897583
        mean_q: 4.156043529510498
        mean_td_error: -0.17009833455085754
        min_q: 3.7720491886138916
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.05858836695551872
        max_q: 3.4776761531829834
        mean_q: 2.287261486053467
        mean_td_error: -1.1063199043273926
        min_q: 1.7216248512268066
    num_steps_sampled: 2332672
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2278,5176.41,2332672,52.8742,57.6398,47.8998,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-34-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.63983499836403
  episode_reward_mean: 51.8979735318174
  episode_reward_min: 47.899836562148856
  episodes_this_iter: 8
  episodes_total: 23352
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2335744
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0073149059899151325
        max_q: 4.2374348640441895
        mean_q: 4.019437313079834
        mean_td_error: -0.059968866407871246
        min_q: 3.9206809997558594
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.033811070024967194
        max_q: 1.885617733001709
        mean_q: 1.5473175048828125
        mean_td_error: -0.5162245035171509
        min_q: 1.3085414171218872
    num_steps_sampled: 2335744
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2281,5182.87,2335744,51.898,57.6398,47.8998,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-34-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.63983499836403
  episode_reward_mean: 53.17116696029505
  episode_reward_min: 47.899836562148856
  episodes_this_iter: 8
  episodes_total: 23384
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2338816
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.029490040615200996
        max_q: 4.14799165725708
        mean_q: 3.7711071968078613
        mean_td_error: -0.24008114635944366
        min_q: 3.4456398487091064
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014251922257244587
        max_q: 1.5344643592834473
        mean_q: 1.2629225254058838
        mean_td_error: -0.19487664103507996
        min_q: 1.012241005897522
    num_steps_sampled: 2338816
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2284,5189.3,2338816,53.1712,57.6398,47.8998,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-35-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.763178380039584
  episode_reward_mean: 52.27505775113647
  episode_reward_min: 47.93160143866384
  episodes_this_iter: 8
  episodes_total: 23416
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2341888
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04471829906105995
        max_q: 3.8638408184051514
        mean_q: 3.418555974960327
        mean_td_error: -0.36539697647094727
        min_q: 3.2075653076171875
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018074464052915573
        max_q: 1.7595653533935547
        mean_q: 1.4767385721206665
        mean_td_error: -0.199088454246521
        min_q: 1.1114156246185303
    num_steps_sampled: 2341888
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2287,5195.53,2341888,52.2751,55.7632,47.9316,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-35-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.763178380039584
  episode_reward_mean: 53.33458094412461
  episode_reward_min: 47.93160143866384
  episodes_this_iter: 16
  episodes_total: 23448
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2344960
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008525419048964977
        max_q: 3.298928737640381
        mean_q: 3.174870491027832
        mean_td_error: -0.09294215589761734
        min_q: 3.1088690757751465
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016637923195958138
        max_q: 0.7618556022644043
        mean_q: 0.4525691866874695
        mean_td_error: -0.1805049180984497
        min_q: 0.044521063566207886
    num_steps_sampled: 2344960
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2290,5202.29,2344960,53.3346,55.7632,47.9316,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-35-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.3589122581248
  episode_reward_mean: 53.27652513082463
  episode_reward_min: 47.93160143866384
  episodes_this_iter: 16
  episodes_total: 23480
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2348032
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00445318641141057
        max_q: 3.7476589679718018
        mean_q: 3.6290876865386963
        mean_td_error: -0.0477026030421257
        min_q: 3.566877841949463
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010038426145911217
        max_q: 1.3803788423538208
        mean_q: 1.3081653118133545
        mean_td_error: 0.13618074357509613
        min_q: 1.163880467414856
    num_steps_sampled: 2348032
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2293,5208.62,2348032,53.2765,59.3589,47.9316,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-35-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.3589122581248
  episode_reward_mean: 52.924478433280235
  episode_reward_min: 47.57565771297568
  episodes_this_iter: 8
  episodes_total: 23504
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2351104
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002165397861972451
        max_q: 3.7510242462158203
        mean_q: 3.6928486824035645
        mean_td_error: -0.01861868053674698
        min_q: 3.5906543731689453
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004425294231623411
        max_q: 2.608649492263794
        mean_q: 2.5169992446899414
        mean_td_error: 0.052719853818416595
        min_q: 2.3742594718933105
    num_steps_sampled: 2351104
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2296,5215.25,2351104,52.9245,59.3589,47.5757,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-35-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.3589122581248
  episode_reward_mean: 51.95884797342913
  episode_reward_min: 47.57565771297568
  episodes_this_iter: 8
  episodes_total: 23536
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2354176
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014019609428942204
        max_q: 3.814213275909424
        mean_q: 3.5821590423583984
        mean_td_error: -0.12906906008720398
        min_q: 3.4238221645355225
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026752992998808622
        max_q: 3.3494460582733154
        mean_q: 3.261430501937866
        mean_td_error: -0.03316932171583176
        min_q: 3.1689014434814453
    num_steps_sampled: 2354176
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2299,5221.75,2354176,51.9588,59.3589,47.5757,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-35-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.511070351214656
  episode_reward_mean: 51.092608577426546
  episode_reward_min: 47.57565771297568
  episodes_this_iter: 8
  episodes_total: 23568
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2357248
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.024425985291600227
        max_q: 3.3654370307922363
        mean_q: 2.6561336517333984
        mean_td_error: -0.14503107964992523
        min_q: 2.1008243560791016
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003149960422888398
        max_q: 3.6380996704101562
        mean_q: 3.412598133087158
        mean_td_error: -0.04051799327135086
        min_q: 3.198338508605957
    num_steps_sampled: 2357248
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2302,5228.46,2357248,51.0926,57.5111,47.5757,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-35-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.511070351214656
  episode_reward_mean: 52.8851475328888
  episode_reward_min: 48.4490105426764
  episodes_this_iter: 8
  episodes_total: 23600
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2360320
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012013854458928108
        max_q: 2.0901870727539062
        mean_q: 1.7874274253845215
        mean_td_error: -0.1243387758731842
        min_q: 1.6592307090759277
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0072523304261267185
        max_q: 3.3362927436828613
        mean_q: 3.1870856285095215
        mean_td_error: -0.09658065438270569
        min_q: 2.958726167678833
    num_steps_sampled: 2360320
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2305,5235.21,2360320,52.8851,57.5111,48.449,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-35-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.511070351214656
  episode_reward_mean: 52.89499075600783
  episode_reward_min: 47.78084749083973
  episodes_this_iter: 16
  episodes_total: 23632
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2363392
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005909450352191925
        max_q: 2.24593186378479
        mean_q: 2.167426347732544
        mean_td_error: 0.05968094244599342
        min_q: 2.0020864009857178
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001674541155807674
        max_q: 3.5712499618530273
        mean_q: 3.4639902114868164
        mean_td_error: 0.011006951332092285
        min_q: 3.3921937942504883
    num_steps_sampled: 2363392
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2308,5242.24,2363392,52.895,57.5111,47.7808,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-35-55
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.03253805411098
  episode_reward_mean: 52.40209180789186
  episode_reward_min: 47.78084749083973
  episodes_this_iter: 16
  episodes_total: 23664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2366464
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02678568847477436
        max_q: 2.389631748199463
        mean_q: 2.194608688354492
        mean_td_error: -0.2889326810836792
        min_q: 2.1110689640045166
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004401238169521093
        max_q: 3.6279711723327637
        mean_q: 3.576481580734253
        mean_td_error: -0.0545833557844162
        min_q: 3.5126006603240967
    num_steps_sampled: 2366464
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2311,5248.8,2366464,52.4021,57.0325,47.7808,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-36-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.03253805411098
  episode_reward_mean: 50.496668499411996
  episode_reward_min: 45.08860709465911
  episodes_this_iter: 8
  episodes_total: 23688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2369536
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004702542442828417
        max_q: 2.317845106124878
        mean_q: 2.0810012817382812
        mean_td_error: -0.029711604118347168
        min_q: 1.9052197933197021
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005550289060920477
        max_q: 3.8519816398620605
        mean_q: 3.522059440612793
        mean_td_error: -0.045385755598545074
        min_q: 3.2270891666412354
    num_steps_sampled: 2369536
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2314,5255.41,2369536,50.4967,57.0325,45.0886,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-36-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.021599312346865
  episode_reward_mean: 49.912748921176814
  episode_reward_min: 45.08860709465911
  episodes_this_iter: 8
  episodes_total: 23720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2372608
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01353693101555109
        max_q: 2.639087677001953
        mean_q: 2.3113179206848145
        mean_td_error: 0.14912568032741547
        min_q: 2.1436142921447754
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014135114848613739
        max_q: 3.2084834575653076
        mean_q: 3.037938356399536
        mean_td_error: -0.18822118639945984
        min_q: 2.8304600715637207
    num_steps_sampled: 2372608
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2317,5261.83,2372608,49.9127,56.0216,45.0886,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-36-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.021599312346865
  episode_reward_mean: 49.18226523157861
  episode_reward_min: 45.08860709465911
  episodes_this_iter: 8
  episodes_total: 23752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2375680
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006113122683018446
        max_q: 2.4862728118896484
        mean_q: 2.3670132160186768
        mean_td_error: -0.06194118410348892
        min_q: 2.2198121547698975
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009205293841660023
        max_q: 3.3172051906585693
        mean_q: 3.235628366470337
        mean_td_error: -0.1020672470331192
        min_q: 3.1710691452026367
    num_steps_sampled: 2375680
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2320,5268.98,2375680,49.1823,56.0216,45.0886,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-36-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.021599312346865
  episode_reward_mean: 49.16832590058814
  episode_reward_min: 45.08860709465911
  episodes_this_iter: 16
  episodes_total: 23776
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2377728
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017565200105309486
        max_q: 2.615786552429199
        mean_q: 2.5092549324035645
        mean_td_error: -0.18973228335380554
        min_q: 2.395922899246216
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006758368108421564
        max_q: 3.49590802192688
        mean_q: 3.2730588912963867
        mean_td_error: 0.08639559149742126
        min_q: 3.1270837783813477
    num_steps_sampled: 2377728
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2322,5273.95,2377728,49.1683,56.0216,45.0886,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-36-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.021599312346865
  episode_reward_mean: 50.116310848581946
  episode_reward_min: 46.87816616665377
  episodes_this_iter: 8
  episodes_total: 23792
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2379776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019595712423324585
        max_q: 2.7212109565734863
        mean_q: 2.6017866134643555
        mean_td_error: -0.20336118340492249
        min_q: 2.4785590171813965
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0046799625270068645
        max_q: 3.1673765182495117
        mean_q: 2.9078147411346436
        mean_td_error: -0.03303726017475128
        min_q: 2.629408359527588
    num_steps_sampled: 2379776
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2324,5279.08,2379776,50.1163,56.0216,46.8782,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-36-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.94396370979509
  episode_reward_mean: 49.92515505304975
  episode_reward_min: 46.87816616665377
  episodes_this_iter: 8
  episodes_total: 23816
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2381824
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03290623426437378
        max_q: 2.706303119659424
        mean_q: 2.4649829864501953
        mean_td_error: -0.3726048469543457
        min_q: 2.226445198059082
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.046335864812135696
        max_q: 2.6897850036621094
        mean_q: 2.3495445251464844
        mean_td_error: -0.5705532431602478
        min_q: 2.1513376235961914
    num_steps_sampled: 2381824
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2326,5284.33,2381824,49.9252,54.944,46.8782,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-36-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.94396370979509
  episode_reward_mean: 50.255234958338534
  episode_reward_min: 46.87816616665377
  episodes_this_iter: 8
  episodes_total: 23832
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2383872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008440077304840088
        max_q: 2.478494644165039
        mean_q: 2.333524227142334
        mean_td_error: -0.08202953636646271
        min_q: 2.166576385498047
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009416647255420685
        max_q: 2.748534679412842
        mean_q: 2.6089282035827637
        mean_td_error: -0.125066876411438
        min_q: 2.4972949028015137
    num_steps_sampled: 2383872
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2328,5289.52,2383872,50.2552,54.944,46.8782,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-36-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.94396370979509
  episode_reward_mean: 50.63373959122668
  episode_reward_min: 47.279908063077876
  episodes_this_iter: 8
  episodes_total: 23856
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2385920
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025067433714866638
        max_q: 2.602332353591919
        mean_q: 2.4322733879089355
        mean_td_error: -0.2874501645565033
        min_q: 2.231732130050659
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013549672439694405
        max_q: 2.7982940673828125
        mean_q: 2.2434873580932617
        mean_td_error: -0.14642302691936493
        min_q: 1.9686192274093628
    num_steps_sampled: 2385920
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2330,5295.1,2385920,50.6337,54.944,47.2799,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-36-49
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.384460876622654
  episode_reward_mean: 51.624660462144575
  episode_reward_min: 47.279908063077876
  episodes_this_iter: 8
  episodes_total: 23872
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2387968
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007518562488257885
        max_q: 2.822295904159546
        mean_q: 2.7935843467712402
        mean_td_error: 0.13481946289539337
        min_q: 2.6868622303009033
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007345345336943865
        max_q: 2.4900166988372803
        mean_q: 2.2710418701171875
        mean_td_error: -0.0921887457370758
        min_q: 2.1585159301757812
    num_steps_sampled: 2387968
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2332,5301.18,2387968,51.6247,56.3845,47.2799,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-36-55
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.384460876622654
  episode_reward_mean: 50.627602264209564
  episode_reward_min: 45.681675731784885
  episodes_this_iter: 8
  episodes_total: 23896
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2390016
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006823853589594364
        max_q: 3.5308990478515625
        mean_q: 3.477593183517456
        mean_td_error: 0.11678796261548996
        min_q: 3.252978563308716
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.034757327288389206
        max_q: 2.414738655090332
        mean_q: 2.23078989982605
        mean_td_error: -0.4586334526538849
        min_q: 2.0192928314208984
    num_steps_sampled: 2390016
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2334,5307.09,2390016,50.6276,56.3845,45.6817,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-37-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.384460876622654
  episode_reward_mean: 50.95375541512294
  episode_reward_min: 45.681675731784885
  episodes_this_iter: 16
  episodes_total: 23920
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2392064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010754966642707586
        max_q: 3.8397345542907715
        mean_q: 3.791773796081543
        mean_td_error: 0.017467215657234192
        min_q: 3.7631914615631104
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.020144671201705933
        max_q: 2.459843397140503
        mean_q: 2.2384259700775146
        mean_td_error: -0.2511216402053833
        min_q: 2.0544180870056152
    num_steps_sampled: 2392064
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2336,5313,2392064,50.9538,56.3845,45.6817,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-37-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.384460876622654
  episode_reward_mean: 50.68217317308742
  episode_reward_min: 45.681675731784885
  episodes_this_iter: 8
  episodes_total: 23936
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2394112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0038398944307118654
        max_q: 4.137101173400879
        mean_q: 4.0585455894470215
        mean_td_error: 0.06735783815383911
        min_q: 3.930727958679199
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008418545126914978
        max_q: 2.549499750137329
        mean_q: 2.4465322494506836
        mean_td_error: -0.11064106971025467
        min_q: 2.3465802669525146
    num_steps_sampled: 2394112
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2338,5319.04,2394112,50.6822,56.3845,45.6817,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-37-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.384460876622654
  episode_reward_mean: 50.336233784810865
  episode_reward_min: 45.681675731784885
  episodes_this_iter: 16
  episodes_total: 23960
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2396160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002870822325348854
        max_q: 4.292177200317383
        mean_q: 4.161299705505371
        mean_td_error: 0.04297609627246857
        min_q: 4.089890480041504
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01378592848777771
        max_q: 2.4525630474090576
        mean_q: 2.314236640930176
        mean_td_error: -0.18702441453933716
        min_q: 2.094111204147339
    num_steps_sampled: 2396160
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2340,5325.05,2396160,50.3362,56.3845,45.6817,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-37-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.917979614805134
  episode_reward_mean: 49.994111292710585
  episode_reward_min: 45.681675731784885
  episodes_this_iter: 8
  episodes_total: 23976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2398208
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014473790070042014
        max_q: 4.3819098472595215
        mean_q: 4.3407392501831055
        mean_td_error: -0.01973786950111389
        min_q: 4.308499813079834
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011911234818398952
        max_q: 2.628445625305176
        mean_q: 2.4827914237976074
        mean_td_error: -0.15833324193954468
        min_q: 2.3280892372131348
    num_steps_sampled: 2398208
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2342,5330.58,2398208,49.9941,54.918,45.6817,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-37-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.917979614805134
  episode_reward_mean: 49.91602391718107
  episode_reward_min: 44.26495700740304
  episodes_this_iter: 8
  episodes_total: 24000
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2400256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001394520397298038
        max_q: 4.57322883605957
        mean_q: 4.508623123168945
        mean_td_error: 0.018183216452598572
        min_q: 4.456912517547607
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02068571373820305
        max_q: 2.708850860595703
        mean_q: 2.446805000305176
        mean_td_error: -0.25537192821502686
        min_q: 2.2288432121276855
    num_steps_sampled: 2400256
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2344,5335.83,2400256,49.916,54.918,44.265,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-37-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.482960979007515
  episode_reward_mean: 49.43057752675974
  episode_reward_min: 44.26495700740304
  episodes_this_iter: 8
  episodes_total: 24016
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2402304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008057639002799988
        max_q: 4.541498184204102
        mean_q: 4.49587345123291
        mean_td_error: -0.13140179216861725
        min_q: 4.4462361335754395
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00810450129210949
        max_q: 2.790644645690918
        mean_q: 2.485234498977661
        mean_td_error: -0.07984478026628494
        min_q: 2.3320729732513428
    num_steps_sampled: 2402304
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2346,5340.92,2402304,49.4306,54.483,44.265,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-37-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.482960979007515
  episode_reward_mean: 49.25696887929433
  episode_reward_min: 44.26495700740304
  episodes_this_iter: 8
  episodes_total: 24048
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2405376
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003345983102917671
        max_q: 4.597056865692139
        mean_q: 4.5612616539001465
        mean_td_error: -0.045579180121421814
        min_q: 4.526294231414795
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004368762485682964
        max_q: 2.9754607677459717
        mean_q: 2.6051764488220215
        mean_td_error: -0.018644683063030243
        min_q: 2.4280481338500977
    num_steps_sampled: 2405376
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2349,5347.86,2405376,49.257,54.483,44.265,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-37-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.79370768463341
  episode_reward_mean: 49.483057597165555
  episode_reward_min: 44.26495700740304
  episodes_this_iter: 8
  episodes_total: 24080
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2408448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0007322782184928656
        max_q: 4.734050273895264
        mean_q: 4.680387496948242
        mean_td_error: 0.001272156834602356
        min_q: 4.595582485198975
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007212722674012184
        max_q: 2.833975315093994
        mean_q: 2.6634092330932617
        mean_td_error: -0.08327504992485046
        min_q: 2.5427536964416504
    num_steps_sampled: 2408448
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2352,5354.35,2408448,49.4831,54.7937,44.265,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-37-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.87266563768555
  episode_reward_mean: 51.012376594295965
  episode_reward_min: 46.016753622652296
  episodes_this_iter: 8
  episodes_total: 24112
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2411520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007667647209018469
        max_q: 4.4649834632873535
        mean_q: 4.378292083740234
        mean_td_error: -0.12936340272426605
        min_q: 4.276731967926025
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009277256205677986
        max_q: 2.742175817489624
        mean_q: 2.5850157737731934
        mean_td_error: -0.11572692543268204
        min_q: 2.480482578277588
    num_steps_sampled: 2411520
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2355,5360.67,2411520,51.0124,54.8727,46.0168,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-37-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.87266563768555
  episode_reward_mean: 51.69144155230643
  episode_reward_min: 46.016753622652296
  episodes_this_iter: 16
  episodes_total: 24144
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2414592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032569430768489838
        max_q: 4.321579456329346
        mean_q: 4.1532979011535645
        mean_td_error: -0.04628950357437134
        min_q: 4.089785575866699
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020609006751328707
        max_q: 2.9968743324279785
        mean_q: 2.898085117340088
        mean_td_error: -0.014246344566345215
        min_q: 2.7618298530578613
    num_steps_sampled: 2414592
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2358,5367.02,2414592,51.6914,54.8727,46.0168,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-38-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.87266563768555
  episode_reward_mean: 51.51637303202107
  episode_reward_min: 48.63260514953596
  episodes_this_iter: 16
  episodes_total: 24176
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2417664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004797799978405237
        max_q: 4.298503875732422
        mean_q: 4.167205810546875
        mean_td_error: -0.06747265160083771
        min_q: 4.073112487792969
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004961771424859762
        max_q: 3.138422966003418
        mean_q: 3.0278031826019287
        mean_td_error: -0.07575229555368423
        min_q: 2.890015125274658
    num_steps_sampled: 2417664
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2361,5373.51,2417664,51.5164,54.8727,48.6326,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-38-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.05581367127884
  episode_reward_mean: 51.43574734789656
  episode_reward_min: 48.41075222456099
  episodes_this_iter: 8
  episodes_total: 24200
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2420736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008537061512470245
        max_q: 4.177303314208984
        mean_q: 4.09877872467041
        mean_td_error: -0.1512225717306137
        min_q: 4.009856224060059
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019361164886504412
        max_q: 3.464273452758789
        mean_q: 3.36922550201416
        mean_td_error: 0.015058629214763641
        min_q: 3.266094207763672
    num_steps_sampled: 2420736
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2364,5379.97,2420736,51.4357,56.0558,48.4108,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-38-17
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.05581367127884
  episode_reward_mean: 51.458739337673684
  episode_reward_min: 48.41075222456099
  episodes_this_iter: 8
  episodes_total: 24232
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2423808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021948202047497034
        max_q: 4.144162654876709
        mean_q: 4.071114540100098
        mean_td_error: -0.03439675271511078
        min_q: 3.9669768810272217
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024315668269991875
        max_q: 3.691997528076172
        mean_q: 3.5288689136505127
        mean_td_error: -0.02436317503452301
        min_q: 3.414302349090576
    num_steps_sampled: 2423808
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2367,5386.46,2423808,51.4587,56.0558,48.4108,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-38-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.05581367127884
  episode_reward_mean: 51.831845165013966
  episode_reward_min: 47.92540808636477
  episodes_this_iter: 8
  episodes_total: 24264
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2426880
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007029817905277014
        max_q: 4.091896057128906
        mean_q: 3.859046697616577
        mean_td_error: -0.1057157814502716
        min_q: 3.6333165168762207
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010740098543465137
        max_q: 3.5139377117156982
        mean_q: 3.3634352684020996
        mean_td_error: -0.17069464921951294
        min_q: 3.1895313262939453
    num_steps_sampled: 2426880
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2370,5392.93,2426880,51.8318,56.0558,47.9254,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-38-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.215036193384385
  episode_reward_mean: 51.70042715328761
  episode_reward_min: 47.92540808636477
  episodes_this_iter: 8
  episodes_total: 24296
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2429952
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003359456080943346
        max_q: 4.175989627838135
        mean_q: 3.973170757293701
        mean_td_error: -0.025622732937335968
        min_q: 3.871732711791992
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.022892700508236885
        max_q: 3.332761764526367
        mean_q: 3.21578311920166
        mean_td_error: -0.36119866371154785
        min_q: 3.049762725830078
    num_steps_sampled: 2429952
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2373,5399.62,2429952,51.7004,56.215,47.9254,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-38-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.215036193384385
  episode_reward_mean: 51.891333914094176
  episode_reward_min: 47.92540808636477
  episodes_this_iter: 16
  episodes_total: 24320
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2432000
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008302547968924046
        max_q: 4.2023138999938965
        mean_q: 3.946404457092285
        mean_td_error: -0.11167459934949875
        min_q: 3.5195631980895996
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008089675568044186
        max_q: 3.3276960849761963
        mean_q: 3.048849582672119
        mean_td_error: -0.10435590893030167
        min_q: 2.905900239944458
    num_steps_sampled: 2432000
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2375,5404.57,2432000,51.8913,56.215,47.9254,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-38-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.215036193384385
  episode_reward_mean: 52.0274005695405
  episode_reward_min: 47.92540808636477
  episodes_this_iter: 8
  episodes_total: 24344
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2435072
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00966664683073759
        max_q: 3.773714303970337
        mean_q: 3.637477397918701
        mean_td_error: 0.149431973695755
        min_q: 3.526362180709839
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002993896370753646
        max_q: 3.354619026184082
        mean_q: 3.237666606903076
        mean_td_error: -0.040908679366111755
        min_q: 3.1280548572540283
    num_steps_sampled: 2435072
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2378,5411.7,2435072,52.0274,56.215,47.9254,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-38-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.43723615925546
  episode_reward_mean: 53.13507323291922
  episode_reward_min: 48.16098683367283
  episodes_this_iter: 8
  episodes_total: 24376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2438144
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001983148278668523
        max_q: 4.079195976257324
        mean_q: 3.954056978225708
        mean_td_error: 0.023484617471694946
        min_q: 3.903380870819092
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.022111792117357254
        max_q: 3.148697853088379
        mean_q: 2.9778354167938232
        mean_td_error: -0.33867958188056946
        min_q: 2.7511298656463623
    num_steps_sampled: 2438144
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2381,5418.88,2438144,53.1351,57.4372,48.161,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-38-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.764411716908484
  episode_reward_mean: 54.320063223071685
  episode_reward_min: 49.3616225834937
  episodes_this_iter: 8
  episodes_total: 24408
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2441216
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0050196112133562565
        max_q: 4.267176628112793
        mean_q: 4.123178958892822
        mean_td_error: -0.07247079908847809
        min_q: 4.023128509521484
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019965680316090584
        max_q: 2.219189405441284
        mean_q: 1.9133180379867554
        mean_td_error: -0.3205980360507965
        min_q: 1.5996716022491455
    num_steps_sampled: 2441216
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2384,5426,2441216,54.3201,60.7644,49.3616,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-39-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.764411716908484
  episode_reward_mean: 54.643764016443285
  episode_reward_min: 50.77993954696315
  episodes_this_iter: 8
  episodes_total: 24440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2444288
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017547288443893194
        max_q: 4.317302703857422
        mean_q: 4.265328407287598
        mean_td_error: -0.021632477641105652
        min_q: 4.116948127746582
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013015461154282093
        max_q: 2.084062099456787
        mean_q: 1.9351725578308105
        mean_td_error: -0.15803946554660797
        min_q: 1.6313880681991577
    num_steps_sampled: 2444288
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2387,5432.6,2444288,54.6438,60.7644,50.7799,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-39-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.764411716908484
  episode_reward_mean: 54.476323519606154
  episode_reward_min: 50.77993954696315
  episodes_this_iter: 16
  episodes_total: 24472
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2447360
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007447472773492336
        max_q: 4.312005519866943
        mean_q: 4.224395275115967
        mean_td_error: -0.12230497598648071
        min_q: 4.105108737945557
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03322432190179825
        max_q: 1.6308238506317139
        mean_q: 1.322566270828247
        mean_td_error: -0.379851371049881
        min_q: 1.1906921863555908
    num_steps_sampled: 2447360
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2390,5438.93,2447360,54.4763,60.7644,50.7799,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-39-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.764411716908484
  episode_reward_mean: 54.059558969574574
  episode_reward_min: 50.76670579307364
  episodes_this_iter: 16
  episodes_total: 24504
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2450432
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006421039812266827
        max_q: 4.221451759338379
        mean_q: 3.973191499710083
        mean_td_error: -0.07994145900011063
        min_q: 3.8278956413269043
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.024401932954788208
        max_q: 2.3131446838378906
        mean_q: 1.9490065574645996
        mean_td_error: -0.28338423371315
        min_q: 1.6868822574615479
    num_steps_sampled: 2450432
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2393,5445.4,2450432,54.0596,60.7644,50.7667,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-39-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.23870477012074
  episode_reward_mean: 53.89596846006427
  episode_reward_min: 50.76670579307364
  episodes_this_iter: 8
  episodes_total: 24528
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2453504
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004336808808147907
        max_q: 4.370534420013428
        mean_q: 4.267944812774658
        mean_td_error: -0.06466268002986908
        min_q: 4.200039386749268
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01068943552672863
        max_q: 2.1067655086517334
        mean_q: 1.95265531539917
        mean_td_error: -0.12746214866638184
        min_q: 1.8413465023040771
    num_steps_sampled: 2453504
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2396,5452.05,2453504,53.896,58.2387,50.7667,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-39-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.40257782262983
  episode_reward_mean: 53.656219196527964
  episode_reward_min: 48.749964819924884
  episodes_this_iter: 8
  episodes_total: 24560
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2456576
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.030131502076983452
        max_q: 4.588006496429443
        mean_q: 4.256749153137207
        mean_td_error: -0.39106523990631104
        min_q: 4.068117141723633
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01723438687622547
        max_q: 2.588613748550415
        mean_q: 2.496171474456787
        mean_td_error: 0.18971401453018188
        min_q: 2.2260937690734863
    num_steps_sampled: 2456576
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2399,5458.56,2456576,53.6562,57.4026,48.75,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-39-38
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.40257782262983
  episode_reward_mean: 54.03525222379887
  episode_reward_min: 48.749964819924884
  episodes_this_iter: 8
  episodes_total: 24592
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2459648
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02855929546058178
        max_q: 4.854948997497559
        mean_q: 3.7215044498443604
        mean_td_error: 0.46529218554496765
        min_q: 3.012789249420166
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008609090931713581
        max_q: 2.7262730598449707
        mean_q: 2.633643627166748
        mean_td_error: -0.10248128324747086
        min_q: 2.4282491207122803
    num_steps_sampled: 2459648
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2402,5464.88,2459648,54.0353,57.4026,48.75,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-39-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.40257782262983
  episode_reward_mean: 53.71647693895037
  episode_reward_min: 47.625381165549165
  episodes_this_iter: 8
  episodes_total: 24624
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2462720
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01109007652848959
        max_q: 3.7198843955993652
        mean_q: 3.469956398010254
        mean_td_error: 0.14652705192565918
        min_q: 3.14001202583313
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004107469692826271
        max_q: 3.19197678565979
        mean_q: 3.0588502883911133
        mean_td_error: -0.04260997474193573
        min_q: 2.9018616676330566
    num_steps_sampled: 2462720
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2405,5471.36,2462720,53.7165,57.4026,47.6254,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-39-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.199103232940324
  episode_reward_mean: 53.90867471198738
  episode_reward_min: 47.625381165549165
  episodes_this_iter: 16
  episodes_total: 24656
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2465792
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002959334524348378
        max_q: 3.7498714923858643
        mean_q: 3.6643261909484863
        mean_td_error: 0.03671933710575104
        min_q: 3.4775805473327637
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009119360707700253
        max_q: 3.742661952972412
        mean_q: 3.63706111907959
        mean_td_error: 0.1574508547782898
        min_q: 3.544121742248535
    num_steps_sampled: 2465792
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2408,5478.03,2465792,53.9087,57.1991,47.6254,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-39-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.199103232940324
  episode_reward_mean: 53.21791071787768
  episode_reward_min: 47.625381165549165
  episodes_this_iter: 16
  episodes_total: 24688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2468864
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009544193162582815
        max_q: 4.375752925872803
        mean_q: 4.276544570922852
        mean_td_error: 0.002412855625152588
        min_q: 4.190240383148193
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003945174161344767
        max_q: 4.075560092926025
        mean_q: 4.015894412994385
        mean_td_error: 0.06352729350328445
        min_q: 3.92531681060791
    num_steps_sampled: 2468864
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2411,5484.55,2468864,53.2179,57.1991,47.6254,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-40-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.199103232940324
  episode_reward_mean: 53.1295588008411
  episode_reward_min: 47.625381165549165
  episodes_this_iter: 8
  episodes_total: 24712
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2471936
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018020229181274772
        max_q: 4.41703987121582
        mean_q: 4.32246732711792
        mean_td_error: -0.010805115103721619
        min_q: 4.164247989654541
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021237735636532307
        max_q: 4.35675573348999
        mean_q: 4.231899261474609
        mean_td_error: 0.020614102482795715
        min_q: 4.107487201690674
    num_steps_sampled: 2471936
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2414,5490.88,2471936,53.1296,57.1991,47.6254,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-40-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.62099725480505
  episode_reward_mean: 52.423139241891796
  episode_reward_min: 48.02695213425037
  episodes_this_iter: 8
  episodes_total: 24744
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2475008
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004779732320457697
        max_q: 4.385919094085693
        mean_q: 4.319096088409424
        mean_td_error: -0.05668042600154877
        min_q: 4.1794281005859375
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002252043690532446
        max_q: 4.469727993011475
        mean_q: 4.398947238922119
        mean_td_error: 0.034972965717315674
        min_q: 4.295132160186768
    num_steps_sampled: 2475008
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2417,5497.7,2475008,52.4231,57.621,48.027,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-40-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.62099725480505
  episode_reward_mean: 52.10700294507129
  episode_reward_min: 48.02695213425037
  episodes_this_iter: 8
  episodes_total: 24776
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2478080
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002486902056261897
        max_q: 4.562232971191406
        mean_q: 4.46320915222168
        mean_td_error: -0.017976507544517517
        min_q: 4.375786304473877
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007105443626642227
        max_q: 4.458202838897705
        mean_q: 4.375371932983398
        mean_td_error: -0.11667914688587189
        min_q: 4.203691005706787
    num_steps_sampled: 2478080
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2420,5504.77,2478080,52.107,57.621,48.027,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-40-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.62099725480505
  episode_reward_mean: 52.884436384644815
  episode_reward_min: 48.02695213425037
  episodes_this_iter: 8
  episodes_total: 24808
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2481152
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012739887461066246
        max_q: 4.168230056762695
        mean_q: 3.970576047897339
        mean_td_error: -0.14553067088127136
        min_q: 3.864609956741333
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006241117604076862
        max_q: 4.403739929199219
        mean_q: 4.290665149688721
        mean_td_error: -0.12993140518665314
        min_q: 4.187860488891602
    num_steps_sampled: 2481152
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2423,5511.61,2481152,52.8844,57.621,48.027,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-40-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.62099725480505
  episode_reward_mean: 53.513858396641915
  episode_reward_min: 50.13093263901323
  episodes_this_iter: 8
  episodes_total: 24840
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2484224
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004431494511663914
        max_q: 4.186136722564697
        mean_q: 4.095965385437012
        mean_td_error: -0.056553274393081665
        min_q: 4.000874042510986
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023402853403240442
        max_q: 4.577004432678223
        mean_q: 4.4855732917785645
        mean_td_error: -0.04751136898994446
        min_q: 4.3958420753479
    num_steps_sampled: 2484224
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2426,5518.14,2484224,53.5139,57.621,50.1309,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-40-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.39698355212696
  episode_reward_mean: 53.47556239163359
  episode_reward_min: 50.13093263901323
  episodes_this_iter: 16
  episodes_total: 24872
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2487296
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029109243769198656
        max_q: 4.488989353179932
        mean_q: 4.27425479888916
        mean_td_error: 0.03154700994491577
        min_q: 4.108202934265137
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.000609043228905648
        max_q: 4.7001142501831055
        mean_q: 4.648135662078857
        mean_td_error: -0.004592642188072205
        min_q: 4.508086681365967
    num_steps_sampled: 2487296
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2429,5524.64,2487296,53.4756,57.397,50.1309,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-40-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.39698355212696
  episode_reward_mean: 52.83204423311157
  episode_reward_min: 45.48017016724666
  episodes_this_iter: 8
  episodes_total: 24896
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2490368
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004587891511619091
        max_q: 4.34113073348999
        mean_q: 4.2334136962890625
        mean_td_error: 0.051293253898620605
        min_q: 4.138134479522705
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001373256091028452
        max_q: 4.839927673339844
        mean_q: 4.778204917907715
        mean_td_error: 0.012489140033721924
        min_q: 4.753922462463379
    num_steps_sampled: 2490368
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2432,5531.64,2490368,52.832,57.397,45.4802,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-40-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.0403942551558
  episode_reward_mean: 51.99239665852124
  episode_reward_min: 45.48017016724666
  episodes_this_iter: 8
  episodes_total: 24928
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2493440
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013015255331993103
        max_q: 4.184310436248779
        mean_q: 4.064938545227051
        mean_td_error: -0.14994841814041138
        min_q: 3.995192289352417
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002549006137996912
        max_q: 4.776127338409424
        mean_q: 4.700806617736816
        mean_td_error: -0.04965229332447052
        min_q: 4.594425201416016
    num_steps_sampled: 2493440
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2435,5538.52,2493440,51.9924,57.0404,45.4802,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-41-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.0403942551558
  episode_reward_mean: 51.1064760406065
  episode_reward_min: 45.48017016724666
  episodes_this_iter: 8
  episodes_total: 24960
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2496512
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006958404555916786
        max_q: 4.215212345123291
        mean_q: 4.022745609283447
        mean_td_error: -0.08173313736915588
        min_q: 3.9546496868133545
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014684894122183323
        max_q: 4.792505264282227
        mean_q: 4.7343525886535645
        mean_td_error: -0.02564491331577301
        min_q: 4.64188814163208
    num_steps_sampled: 2496512
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2438,5545.16,2496512,51.1065,57.0404,45.4802,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-41-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.0403942551558
  episode_reward_mean: 49.623736322866705
  episode_reward_min: 44.45569260086589
  episodes_this_iter: 8
  episodes_total: 24992
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2499584
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008738123811781406
        max_q: 3.9000015258789062
        mean_q: 3.7953808307647705
        mean_td_error: 0.11951634287834167
        min_q: 3.6091270446777344
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002617885125800967
        max_q: 4.714686870574951
        mean_q: 4.615156173706055
        mean_td_error: -0.05213499069213867
        min_q: 4.5666184425354
    num_steps_sampled: 2499584
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2441,5551.95,2499584,49.6237,57.0404,44.4557,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-41-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.70399340267041
  episode_reward_mean: 49.83070795386791
  episode_reward_min: 44.45569260086589
  episodes_this_iter: 8
  episodes_total: 25024
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2502656
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002402633661404252
        max_q: 3.596848964691162
        mean_q: 3.497497797012329
        mean_td_error: -0.014855630695819855
        min_q: 3.3831214904785156
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00185285450424999
        max_q: 4.560016632080078
        mean_q: 4.473970413208008
        mean_td_error: -0.028391778469085693
        min_q: 4.383384704589844
    num_steps_sampled: 2502656
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2444,5559.02,2502656,49.8307,55.704,44.4557,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-41-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.10491135215132
  episode_reward_mean: 49.64262500826868
  episode_reward_min: 44.45569260086589
  episodes_this_iter: 16
  episodes_total: 25056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2505728
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024888000916689634
        max_q: 3.8331127166748047
        mean_q: 3.7574262619018555
        mean_td_error: 0.025992773473262787
        min_q: 3.5727319717407227
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012424506712704897
        max_q: 4.545892238616943
        mean_q: 4.468697547912598
        mean_td_error: -0.01551210880279541
        min_q: 4.276398658752441
    num_steps_sampled: 2505728
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2447,5565.75,2505728,49.6426,54.1049,44.4557,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-41-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.10491135215132
  episode_reward_mean: 49.489963452246464
  episode_reward_min: 43.48283991558692
  episodes_this_iter: 16
  episodes_total: 25088
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2508800
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009706953540444374
        max_q: 3.927318811416626
        mean_q: 3.602520704269409
        mean_td_error: -0.12259860336780548
        min_q: 3.3558568954467773
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027847394812852144
        max_q: 4.509957313537598
        mean_q: 4.4555253982543945
        mean_td_error: -0.05474415421485901
        min_q: 4.354687213897705
    num_steps_sampled: 2508800
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2450,5572.33,2508800,49.49,54.1049,43.4828,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-41-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.10491135215132
  episode_reward_mean: 48.68138661095934
  episode_reward_min: 43.48283991558692
  episodes_this_iter: 8
  episodes_total: 25112
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2511872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006797167006880045
        max_q: 3.5292539596557617
        mean_q: 3.1612987518310547
        mean_td_error: -0.0648469552397728
        min_q: 2.8293004035949707
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016949484124779701
        max_q: 4.501741409301758
        mean_q: 4.407956600189209
        mean_td_error: -0.033360183238983154
        min_q: 4.384559631347656
    num_steps_sampled: 2511872
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2453,5578.9,2511872,48.6814,54.1049,43.4828,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-41-43
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.958805252192604
  episode_reward_mean: 50.053547751304
  episode_reward_min: 43.48283991558692
  episodes_this_iter: 8
  episodes_total: 25144
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2514944
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.027082763612270355
        max_q: 2.7126898765563965
        mean_q: 2.4530935287475586
        mean_td_error: -0.4020623564720154
        min_q: 2.287022590637207
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0009919684380292892
        max_q: 4.48555326461792
        mean_q: 4.403267860412598
        mean_td_error: -0.002321898937225342
        min_q: 4.327476501464844
    num_steps_sampled: 2514944
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2456,5585.5,2514944,50.0535,57.9588,43.4828,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-41-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.958805252192604
  episode_reward_mean: 50.77031382164452
  episode_reward_min: 43.48283991558692
  episodes_this_iter: 8
  episodes_total: 25176
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2518016
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002441930817440152
        max_q: 2.7594056129455566
        mean_q: 2.683525800704956
        mean_td_error: -0.01412421464920044
        min_q: 2.601288318634033
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019276771927252412
        max_q: 4.501110076904297
        mean_q: 4.390186309814453
        mean_td_error: -0.025955364108085632
        min_q: 4.288680076599121
    num_steps_sampled: 2518016
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2459,5592.4,2518016,50.7703,57.9588,43.4828,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-41-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.958805252192604
  episode_reward_mean: 51.08812896067038
  episode_reward_min: 45.521597969979325
  episodes_this_iter: 8
  episodes_total: 25208
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2521088
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009027275256812572
        max_q: 2.8840830326080322
        mean_q: 2.664127826690674
        mean_td_error: -0.1144876554608345
        min_q: 2.5797131061553955
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022913033608347178
        max_q: 4.494691371917725
        mean_q: 4.262679100036621
        mean_td_error: 0.029385536909103394
        min_q: 4.118304252624512
    num_steps_sampled: 2521088
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2462,5598.99,2521088,51.0881,57.9588,45.5216,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-42-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.21899905817818
  episode_reward_mean: 50.55565829800099
  episode_reward_min: 45.521597969979325
  episodes_this_iter: 16
  episodes_total: 25240
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2524160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009646735154092312
        max_q: 2.9859845638275146
        mean_q: 2.730879545211792
        mean_td_error: -0.12195378541946411
        min_q: 2.6420345306396484
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04410449042916298
        max_q: 4.118806838989258
        mean_q: 2.7363035678863525
        mean_td_error: -0.6589755415916443
        min_q: 1.9828987121582031
    num_steps_sampled: 2524160
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2465,5605.79,2524160,50.5557,54.219,45.5216,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-42-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.21899905817818
  episode_reward_mean: 49.895689790524486
  episode_reward_min: 45.34202629283766
  episodes_this_iter: 16
  episodes_total: 25272
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2527232
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007357052061706781
        max_q: 3.1220901012420654
        mean_q: 3.006960153579712
        mean_td_error: -0.10058186203241348
        min_q: 2.9106855392456055
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017154734581708908
        max_q: 2.9350709915161133
        mean_q: 1.9953227043151855
        mean_td_error: -0.030995871871709824
        min_q: 1.4772846698760986
    num_steps_sampled: 2527232
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2468,5612.65,2527232,49.8957,54.219,45.342,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-42-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.650603961110335
  episode_reward_mean: 52.64138673517864
  episode_reward_min: 45.34202629283766
  episodes_this_iter: 8
  episodes_total: 25296
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2530304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002367080422118306
        max_q: 3.309635877609253
        mean_q: 3.167006254196167
        mean_td_error: -0.008557900786399841
        min_q: 3.042351484298706
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002226609503850341
        max_q: 2.513329267501831
        mean_q: 2.3442232608795166
        mean_td_error: 0.029756098985671997
        min_q: 2.3035762310028076
    num_steps_sampled: 2530304
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2471,5619.47,2530304,52.6414,61.6506,45.342,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-42-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.650603961110335
  episode_reward_mean: 52.516354369720446
  episode_reward_min: 45.34202629283766
  episodes_this_iter: 8
  episodes_total: 25328
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2533376
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0039987401105463505
        max_q: 3.299377918243408
        mean_q: 3.1741445064544678
        mean_td_error: -0.048816315829753876
        min_q: 3.088247537612915
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025861873291432858
        max_q: 2.877864360809326
        mean_q: 2.809293508529663
        mean_td_error: -0.040905632078647614
        min_q: 2.751206636428833
    num_steps_sampled: 2533376
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2474,5626.22,2533376,52.5164,61.6506,45.342,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-42-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.650603961110335
  episode_reward_mean: 51.856377808691306
  episode_reward_min: 45.34202629283766
  episodes_this_iter: 8
  episodes_total: 25360
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2536448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015195054933428764
        max_q: 3.4095115661621094
        mean_q: 3.260432720184326
        mean_td_error: -0.1908147931098938
        min_q: 3.162379264831543
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004542074166238308
        max_q: 3.4043877124786377
        mean_q: 3.293855667114258
        mean_td_error: 0.05029178410768509
        min_q: 3.209815740585327
    num_steps_sampled: 2536448
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2477,5632.83,2536448,51.8564,61.6506,45.342,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-42-38
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.76302755502853
  episode_reward_mean: 51.292743189875324
  episode_reward_min: 47.36050940606855
  episodes_this_iter: 8
  episodes_total: 25392
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2539520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00737319141626358
        max_q: 3.54114031791687
        mean_q: 3.430485725402832
        mean_td_error: -0.08652101457118988
        min_q: 3.3476858139038086
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001340589951723814
        max_q: 3.3941891193389893
        mean_q: 3.301757335662842
        mean_td_error: -0.011789977550506592
        min_q: 3.1727046966552734
    num_steps_sampled: 2539520
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2480,5639.48,2539520,51.2927,56.763,47.3605,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-42-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.245310918771224
  episode_reward_mean: 51.69155242725452
  episode_reward_min: 47.36050940606855
  episodes_this_iter: 16
  episodes_total: 25424
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2542592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011050340719521046
        max_q: 3.392892360687256
        mean_q: 3.2929577827453613
        mean_td_error: -0.14530083537101746
        min_q: 3.0916788578033447
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00974486768245697
        max_q: 3.495114326477051
        mean_q: 3.3755836486816406
        mean_td_error: -0.13859933614730835
        min_q: 3.3174915313720703
    num_steps_sampled: 2542592
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2483,5646.52,2542592,51.6916,57.2453,47.3605,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-42-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.37074772392841
  episode_reward_mean: 52.96003965301959
  episode_reward_min: 48.09915017183283
  episodes_this_iter: 16
  episodes_total: 25456
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2545664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0036936597898602486
        max_q: 3.4157373905181885
        mean_q: 3.2884140014648438
        mean_td_error: -0.0364857017993927
        min_q: 3.131181001663208
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004287097603082657
        max_q: 3.7734601497650146
        mean_q: 3.5598626136779785
        mean_td_error: 0.06390226632356644
        min_q: 3.3739075660705566
    num_steps_sampled: 2545664
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2486,5653.17,2545664,52.96,61.3707,48.0992,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-42-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.37074772392841
  episode_reward_mean: 52.87642167846924
  episode_reward_min: 48.29188561567338
  episodes_this_iter: 8
  episodes_total: 25480
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2548736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025228159502148628
        max_q: 3.3669466972351074
        mean_q: 3.0357728004455566
        mean_td_error: -0.3130255937576294
        min_q: 2.573873519897461
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013042532838881016
        max_q: 3.9110147953033447
        mean_q: 3.7385427951812744
        mean_td_error: -0.19249634444713593
        min_q: 3.5268588066101074
    num_steps_sampled: 2548736
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2489,5659.68,2548736,52.8764,61.3707,48.2919,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-43-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.37074772392841
  episode_reward_mean: 52.08033272492463
  episode_reward_min: 47.697535931637766
  episodes_this_iter: 8
  episodes_total: 25512
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2551808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005279317032545805
        max_q: 3.1619348526000977
        mean_q: 2.9481618404388428
        mean_td_error: -0.07179013639688492
        min_q: 2.7512543201446533
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002620787126943469
        max_q: 3.9620110988616943
        mean_q: 3.7157185077667236
        mean_td_error: -0.025599047541618347
        min_q: 3.595404624938965
    num_steps_sampled: 2551808
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2492,5666.27,2551808,52.0803,61.3707,47.6975,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-43-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.76635028785144
  episode_reward_mean: 52.054646547965596
  episode_reward_min: 47.697535931637766
  episodes_this_iter: 8
  episodes_total: 25544
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2554880
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008494880166836083
        max_q: 3.305208683013916
        mean_q: 3.1912503242492676
        mean_td_error: -0.010434068739414215
        min_q: 3.1046626567840576
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01067833136767149
        max_q: 3.552002429962158
        mean_q: 3.4024267196655273
        mean_td_error: -0.15355372428894043
        min_q: 3.222597599029541
    num_steps_sampled: 2554880
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2495,5673.26,2554880,52.0546,55.7664,47.6975,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-43-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.7630403404542
  episode_reward_mean: 53.53102911703537
  episode_reward_min: 47.697535931637766
  episodes_this_iter: 8
  episodes_total: 25576
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2557952
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009122628718614578
        max_q: 3.4568777084350586
        mean_q: 3.305619955062866
        mean_td_error: -0.14629116654396057
        min_q: 3.1649577617645264
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015231224708259106
        max_q: 3.3553683757781982
        mean_q: 3.2628040313720703
        mean_td_error: -0.21445247530937195
        min_q: 3.095017433166504
    num_steps_sampled: 2557952
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2498,5680.18,2557952,53.531,59.763,47.6975,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-43-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.7630403404542
  episode_reward_mean: 53.7127517067677
  episode_reward_min: 49.23153937114787
  episodes_this_iter: 8
  episodes_total: 25608
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2561024
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037559012416750193
        max_q: 3.627067804336548
        mean_q: 3.4461050033569336
        mean_td_error: -0.05357031524181366
        min_q: 3.2800168991088867
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.028806399554014206
        max_q: 2.710554599761963
        mean_q: 2.224456310272217
        mean_td_error: -0.29874229431152344
        min_q: 1.9899595975875854
    num_steps_sampled: 2561024
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2501,5687.24,2561024,53.7128,59.763,49.2315,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-43-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.7630403404542
  episode_reward_mean: 53.90953257371202
  episode_reward_min: 49.88710245194884
  episodes_this_iter: 8
  episodes_total: 25624
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2563072
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006402669474482536
        max_q: 3.511990785598755
        mean_q: 3.3169615268707275
        mean_td_error: -0.09597142785787582
        min_q: 3.131542921066284
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018316954374313354
        max_q: 2.2271406650543213
        mean_q: 2.0563671588897705
        mean_td_error: -0.23583769798278809
        min_q: 1.9209024906158447
    num_steps_sampled: 2563072
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2503,5692.21,2563072,53.9095,59.763,49.8871,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-43-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.7630403404542
  episode_reward_mean: 53.267268355301496
  episode_reward_min: 48.60781823697759
  episodes_this_iter: 8
  episodes_total: 25656
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2566144
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011359425261616707
        max_q: 3.4338362216949463
        mean_q: 3.1698691844940186
        mean_td_error: -0.18514451384544373
        min_q: 2.808845281600952
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003384818322956562
        max_q: 2.55777645111084
        mean_q: 2.340519428253174
        mean_td_error: -0.030808180570602417
        min_q: 2.0481019020080566
    num_steps_sampled: 2566144
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2506,5698.98,2566144,53.2673,59.763,48.6078,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-43-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.61746789574267
  episode_reward_mean: 53.21890164574588
  episode_reward_min: 48.60781823697759
  episodes_this_iter: 8
  episodes_total: 25688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2569216
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011162985116243362
        max_q: 2.6642186641693115
        mean_q: 2.3793416023254395
        mean_td_error: -0.18864357471466064
        min_q: 2.275351047515869
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01910136267542839
        max_q: 2.3374531269073486
        mean_q: 2.061260461807251
        mean_td_error: -0.24432800710201263
        min_q: 1.7611098289489746
    num_steps_sampled: 2569216
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2509,5705.59,2569216,53.2189,56.6175,48.6078,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-43-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.61746789574267
  episode_reward_mean: 53.084267070372086
  episode_reward_min: 48.60781823697759
  episodes_this_iter: 8
  episodes_total: 25720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2572288
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016336286440491676
        max_q: 2.5853054523468018
        mean_q: 2.427493095397949
        mean_td_error: -0.2773341238498688
        min_q: 2.1953768730163574
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008897250518202782
        max_q: 2.3820085525512695
        mean_q: 2.1231398582458496
        mean_td_error: -0.09755906462669373
        min_q: 1.8940649032592773
    num_steps_sampled: 2572288
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2512,5712.11,2572288,53.0843,56.6175,48.6078,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-44-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.36223975148002
  episode_reward_mean: 53.99809014986823
  episode_reward_min: 49.35088972925251
  episodes_this_iter: 16
  episodes_total: 25752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2575360
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00691321911290288
        max_q: 2.7720401287078857
        mean_q: 2.618018865585327
        mean_td_error: -0.12246059626340866
        min_q: 2.5179362297058105
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005844616796821356
        max_q: 2.2811050415039062
        mean_q: 2.118716239929199
        mean_td_error: -0.06052746623754501
        min_q: 1.9301034212112427
    num_steps_sampled: 2575360
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2515,5718.73,2575360,53.9981,58.3622,49.3509,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-44-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.67976964214787
  episode_reward_mean: 53.62105327942306
  episode_reward_min: 47.02430224065573
  episodes_this_iter: 16
  episodes_total: 25784
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2578432
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011310825124382973
        max_q: 3.277250289916992
        mean_q: 2.84590220451355
        mean_td_error: -0.1909903585910797
        min_q: 2.53139591217041
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014464501291513443
        max_q: 2.81758451461792
        mean_q: 2.4916887283325195
        mean_td_error: -0.1276174634695053
        min_q: 2.2764768600463867
    num_steps_sampled: 2578432
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2518,5725.32,2578432,53.6211,58.6798,47.0243,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-44-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.41306541743043
  episode_reward_mean: 55.18757332054014
  episode_reward_min: 47.02430224065573
  episodes_this_iter: 8
  episodes_total: 25808
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2581504
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005728085059672594
        max_q: 3.417556047439575
        mean_q: 3.1526527404785156
        mean_td_error: -0.09234338253736496
        min_q: 3.0255534648895264
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04910990968346596
        max_q: 2.522176742553711
        mean_q: 2.191434144973755
        mean_td_error: -0.5923627018928528
        min_q: 1.911096215248108
    num_steps_sampled: 2581504
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2521,5732.16,2581504,55.1876,61.4131,47.0243,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-44-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.41306541743043
  episode_reward_mean: 55.618385100482925
  episode_reward_min: 47.02430224065573
  episodes_this_iter: 8
  episodes_total: 25840
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2584576
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01025115791708231
        max_q: 3.507504463195801
        mean_q: 3.175739288330078
        mean_td_error: -0.1732797920703888
        min_q: 2.9696483612060547
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02378578670322895
        max_q: 2.175703287124634
        mean_q: 2.0640859603881836
        mean_td_error: -0.2949683666229248
        min_q: 1.972907543182373
    num_steps_sampled: 2584576
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2524,5738.77,2584576,55.6184,61.4131,47.0243,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-44-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.41306541743043
  episode_reward_mean: 54.39447383458523
  episode_reward_min: 47.02430224065573
  episodes_this_iter: 8
  episodes_total: 25872
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2587648
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004909783601760864
        max_q: 3.4412527084350586
        mean_q: 3.348811388015747
        mean_td_error: -0.08628235012292862
        min_q: 3.274594306945801
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001520880963653326
        max_q: 2.889810085296631
        mean_q: 2.721778631210327
        mean_td_error: 0.005323685705661774
        min_q: 2.6371676921844482
    num_steps_sampled: 2587648
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2527,5745.47,2587648,54.3945,61.4131,47.0243,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-44-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.41306541743043
  episode_reward_mean: 53.50903837675937
  episode_reward_min: 47.83341308035916
  episodes_this_iter: 8
  episodes_total: 25904
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2590720
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027551075909286737
        max_q: 3.585784912109375
        mean_q: 3.426643133163452
        mean_td_error: 0.023587308824062347
        min_q: 3.331308364868164
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012417173013091087
        max_q: 3.0811009407043457
        mean_q: 2.864006996154785
        mean_td_error: -0.15160992741584778
        min_q: 2.7439162731170654
    num_steps_sampled: 2590720
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2530,5752.17,2590720,53.509,61.4131,47.8334,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-44-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.97463738964599
  episode_reward_mean: 52.40614831073719
  episode_reward_min: 47.83341308035916
  episodes_this_iter: 16
  episodes_total: 25936
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2593792
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006386083550751209
        max_q: 3.456976890563965
        mean_q: 3.141643762588501
        mean_td_error: -0.09718628227710724
        min_q: 3.0288357734680176
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013913197442889214
        max_q: 3.0874176025390625
        mean_q: 2.983682870864868
        mean_td_error: -0.1744716763496399
        min_q: 2.9130570888519287
    num_steps_sampled: 2593792
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2533,5759.13,2593792,52.4061,54.9746,47.8334,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-44-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.60740862525993
  episode_reward_mean: 52.37987167829928
  episode_reward_min: 47.83341308035916
  episodes_this_iter: 16
  episodes_total: 25968
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2596864
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011144588701426983
        max_q: 3.2374653816223145
        mean_q: 3.1465468406677246
        mean_td_error: -0.12437894940376282
        min_q: 3.064781665802002
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012516187503933907
        max_q: 3.4462077617645264
        mean_q: 3.3342080116271973
        mean_td_error: -0.15892502665519714
        min_q: 3.287950277328491
    num_steps_sampled: 2596864
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2536,5765.82,2596864,52.3799,54.6074,47.8334,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-44-55
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.76018148774423
  episode_reward_mean: 52.68487178894391
  episode_reward_min: 47.83341308035916
  episodes_this_iter: 8
  episodes_total: 25992
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2599936
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02903338335454464
        max_q: 3.1743311882019043
        mean_q: 2.4563534259796143
        mean_td_error: -0.3182067275047302
        min_q: 2.1313586235046387
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007229469250887632
        max_q: 3.431624174118042
        mean_q: 3.1732349395751953
        mean_td_error: -0.08215662837028503
        min_q: 2.8867526054382324
    num_steps_sampled: 2599936
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2539,5772.13,2599936,52.6849,54.7602,47.8334,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-45-01
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.76018148774423
  episode_reward_mean: 52.67616645419399
  episode_reward_min: 50.65536269059823
  episodes_this_iter: 8
  episodes_total: 26024
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2603008
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.032767124474048615
        max_q: 2.397617816925049
        mean_q: 1.7846076488494873
        mean_td_error: -0.38304898142814636
        min_q: 1.1535873413085938
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02380061335861683
        max_q: 3.0740439891815186
        mean_q: 2.621507406234741
        mean_td_error: -0.29533642530441284
        min_q: 2.364621877670288
    num_steps_sampled: 2603008
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2542,5778.23,2603008,52.6762,54.7602,50.6554,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-45-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.01432840231699
  episode_reward_mean: 52.825743747864536
  episode_reward_min: 49.52046782605209
  episodes_this_iter: 8
  episodes_total: 26056
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2606080
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00510397320613265
        max_q: 1.624218463897705
        mean_q: 1.4265544414520264
        mean_td_error: 0.05194269120693207
        min_q: 1.044234037399292
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004344729706645012
        max_q: 3.0413973331451416
        mean_q: 2.938481330871582
        mean_td_error: -0.052705325186252594
        min_q: 2.8740267753601074
    num_steps_sampled: 2606080
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2545,5784.8,2606080,52.8257,59.0143,49.5205,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-45-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.01432840231699
  episode_reward_mean: 52.3332701784753
  episode_reward_min: 49.22161207258884
  episodes_this_iter: 8
  episodes_total: 26088
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2609152
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00468721566721797
        max_q: 2.425241231918335
        mean_q: 2.303760290145874
        mean_td_error: 0.04549422860145569
        min_q: 2.1011464595794678
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004892067983746529
        max_q: 3.5839905738830566
        mean_q: 3.38712739944458
        mean_td_error: 0.05031605809926987
        min_q: 3.216120719909668
    num_steps_sampled: 2609152
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2548,5790.6,2609152,52.3333,59.0143,49.2216,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-45-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.01432840231699
  episode_reward_mean: 52.70826548720331
  episode_reward_min: 49.22161207258884
  episodes_this_iter: 8
  episodes_total: 26120
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2612224
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0030731591396033764
        max_q: 2.719472885131836
        mean_q: 2.638892889022827
        mean_td_error: -0.024150557816028595
        min_q: 2.542808771133423
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016926350072026253
        max_q: 3.579552173614502
        mean_q: 3.3639943599700928
        mean_td_error: -0.1590890884399414
        min_q: 3.2167463302612305
    num_steps_sampled: 2612224
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2551,5796.38,2612224,52.7083,59.0143,49.2216,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-45-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.01432840231699
  episode_reward_mean: 52.50495470312675
  episode_reward_min: 47.795070234679265
  episodes_this_iter: 16
  episodes_total: 26152
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2615296
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004536062944680452
        max_q: 3.374878168106079
        mean_q: 3.1619033813476562
        mean_td_error: -0.05333453416824341
        min_q: 3.0966615676879883
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025353044271469116
        max_q: 3.364367723464966
        mean_q: 3.2153573036193848
        mean_td_error: -0.2669520974159241
        min_q: 3.0305399894714355
    num_steps_sampled: 2615296
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2554,5802.37,2615296,52.505,59.0143,47.7951,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-45-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.06506501825053
  episode_reward_mean: 52.86099892388587
  episode_reward_min: 47.795070234679265
  episodes_this_iter: 8
  episodes_total: 26176
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2618368
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0036369143053889275
        max_q: 3.4465980529785156
        mean_q: 3.369306802749634
        mean_td_error: -0.0463530570268631
        min_q: 3.2772369384765625
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02516891621053219
        max_q: 3.170529842376709
        mean_q: 2.9738528728485107
        mean_td_error: -0.26790711283683777
        min_q: 2.7441866397857666
    num_steps_sampled: 2618368
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2557,5808.46,2618368,52.861,58.0651,47.7951,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-45-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.06506501825053
  episode_reward_mean: 52.49893552043049
  episode_reward_min: 47.795070234679265
  episodes_this_iter: 8
  episodes_total: 26208
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2621440
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014148497022688389
        max_q: 3.6030638217926025
        mean_q: 3.3840062618255615
        mean_td_error: -0.18109501898288727
        min_q: 3.3123509883880615
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009208320640027523
        max_q: 3.418494701385498
        mean_q: 2.7809646129608154
        mean_td_error: 0.003461278975009918
        min_q: 2.5097579956054688
    num_steps_sampled: 2621440
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2560,5814.44,2621440,52.4989,58.0651,47.7951,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-45-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.06940536896643
  episode_reward_mean: 52.19928979596661
  episode_reward_min: 47.795070234679265
  episodes_this_iter: 8
  episodes_total: 26240
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2624512
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004041519947350025
        max_q: 3.72824764251709
        mean_q: 3.6071367263793945
        mean_td_error: -0.04153537005186081
        min_q: 3.4734580516815186
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017794856801629066
        max_q: 2.296929359436035
        mean_q: 2.066983699798584
        mean_td_error: -0.18746231496334076
        min_q: 1.6669217348098755
    num_steps_sampled: 2624512
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2563,5820.39,2624512,52.1993,57.0694,47.7951,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-45-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.781157243250085
  episode_reward_mean: 51.45503959037082
  episode_reward_min: 50.09170134027529
  episodes_this_iter: 8
  episodes_total: 26272
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2627584
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003931327722966671
        max_q: 3.9726974964141846
        mean_q: 3.9188733100891113
        mean_td_error: -0.05434595048427582
        min_q: 3.8764448165893555
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008197678253054619
        max_q: 2.2944188117980957
        mean_q: 2.119983196258545
        mean_td_error: -0.057699933648109436
        min_q: 2.0183968544006348
    num_steps_sampled: 2627584
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2566,5826.36,2627584,51.455,54.7812,50.0917,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-45-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.570558461429144
  episode_reward_mean: 52.01531475380918
  episode_reward_min: 48.47988399690048
  episodes_this_iter: 8
  episodes_total: 26304
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2630656
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02084321156144142
        max_q: 4.041472911834717
        mean_q: 3.9378552436828613
        mean_td_error: -0.21294847130775452
        min_q: 3.884920120239258
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005612756125628948
        max_q: 2.3964364528656006
        mean_q: 2.24946928024292
        mean_td_error: -0.0515037402510643
        min_q: 2.1788337230682373
    num_steps_sampled: 2630656
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2569,5832.31,2630656,52.0153,55.5706,48.4799,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-46-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.570558461429144
  episode_reward_mean: 51.308556078606436
  episode_reward_min: 45.939301329807456
  episodes_this_iter: 16
  episodes_total: 26336
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2633728
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.040570326149463654
        max_q: 3.7025156021118164
        mean_q: 3.19566011428833
        mean_td_error: -0.3397589921951294
        min_q: 2.9995126724243164
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015129259787499905
        max_q: 2.6813302040100098
        mean_q: 2.5228359699249268
        mean_td_error: -0.13924822211265564
        min_q: 2.4103569984436035
    num_steps_sampled: 2633728
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2572,5838.28,2633728,51.3086,55.5706,45.9393,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-46-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.570558461429144
  episode_reward_mean: 51.96459016940247
  episode_reward_min: 45.939301329807456
  episodes_this_iter: 16
  episodes_total: 26368
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2636800
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008849425241351128
        max_q: 3.317183494567871
        mean_q: 3.133465051651001
        mean_td_error: -0.07640153914690018
        min_q: 2.8460614681243896
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0040183234959840775
        max_q: 3.181304693222046
        mean_q: 3.022122621536255
        mean_td_error: 0.04087480902671814
        min_q: 2.62434983253479
    num_steps_sampled: 2636800
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2575,5844.23,2636800,51.9646,55.5706,45.9393,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-46-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.570558461429144
  episode_reward_mean: 51.82704449486242
  episode_reward_min: 45.939301329807456
  episodes_this_iter: 8
  episodes_total: 26392
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2639872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005440231878310442
        max_q: 3.1953279972076416
        mean_q: 3.0980868339538574
        mean_td_error: -0.04928792268037796
        min_q: 2.900855541229248
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016432633623480797
        max_q: 2.8358945846557617
        mean_q: 2.585827589035034
        mean_td_error: -0.16750505566596985
        min_q: 2.4706568717956543
    num_steps_sampled: 2639872
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2578,5850.06,2639872,51.827,55.5706,45.9393,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-46-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.349319482420725
  episode_reward_mean: 51.85379601360037
  episode_reward_min: 46.01444093690665
  episodes_this_iter: 8
  episodes_total: 26424
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2642944
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03902382031083107
        max_q: 3.0676867961883545
        mean_q: 2.7920546531677246
        mean_td_error: -0.38717445731163025
        min_q: 2.5985817909240723
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01374876033514738
        max_q: 2.867771863937378
        mean_q: 2.6797451972961426
        mean_td_error: -0.13702505826950073
        min_q: 2.537842273712158
    num_steps_sampled: 2642944
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2581,5855.87,2642944,51.8538,54.3493,46.0144,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-46-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.140482455235684
  episode_reward_mean: 52.48959752208213
  episode_reward_min: 46.01444093690665
  episodes_this_iter: 8
  episodes_total: 26456
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2646016
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012087304145097733
        max_q: 2.7853355407714844
        mean_q: 2.6046946048736572
        mean_td_error: -0.0947456955909729
        min_q: 2.3986034393310547
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005370984319597483
        max_q: 3.025413990020752
        mean_q: 2.9237124919891357
        mean_td_error: 0.056832946836948395
        min_q: 2.826030731201172
    num_steps_sampled: 2646016
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2584,5861.81,2646016,52.4896,58.1405,46.0144,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-46-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.140482455235684
  episode_reward_mean: 50.97227167406797
  episode_reward_min: 46.01444093690665
  episodes_this_iter: 8
  episodes_total: 26488
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2649088
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011721406131982803
        max_q: 3.0422022342681885
        mean_q: 2.8134138584136963
        mean_td_error: -0.07815710455179214
        min_q: 2.6571149826049805
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011381265707314014
        max_q: 3.2433924674987793
        mean_q: 3.1089470386505127
        mean_td_error: -0.10485780239105225
        min_q: 3.006127119064331
    num_steps_sampled: 2649088
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2587,5868,2649088,50.9723,58.1405,46.0144,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-46-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.140482455235684
  episode_reward_mean: 50.462851021526966
  episode_reward_min: 46.01444093690665
  episodes_this_iter: 16
  episodes_total: 26520
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2652160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025577599182724953
        max_q: 2.6630561351776123
        mean_q: 2.4636545181274414
        mean_td_error: -0.30028367042541504
        min_q: 2.2907872200012207
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005402821581810713
        max_q: 3.656104803085327
        mean_q: 3.590909481048584
        mean_td_error: 0.049815885722637177
        min_q: 3.4771199226379395
    num_steps_sampled: 2652160
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2590,5873.81,2652160,50.4629,58.1405,46.0144,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-46-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.664867426987485
  episode_reward_mean: 49.08365282534868
  episode_reward_min: 44.79673112435675
  episodes_this_iter: 16
  episodes_total: 26552
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2655232
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.025855427607893944
        max_q: 2.6198978424072266
        mean_q: 2.2081680297851562
        mean_td_error: -0.274323970079422
        min_q: 1.933539628982544
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009655162692070007
        max_q: 3.9314775466918945
        mean_q: 3.6829283237457275
        mean_td_error: -0.08256539702415466
        min_q: 3.5717368125915527
    num_steps_sampled: 2655232
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2593,5879.79,2655232,49.0837,53.6649,44.7967,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-46-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.664867426987485
  episode_reward_mean: 48.66835573022976
  episode_reward_min: 44.79673112435675
  episodes_this_iter: 8
  episodes_total: 26576
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2658304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0242155808955431
        max_q: 2.014510154724121
        mean_q: 1.698953628540039
        mean_td_error: -0.24124003946781158
        min_q: 1.5827726125717163
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002936804201453924
        max_q: 3.942333221435547
        mean_q: 3.868501663208008
        mean_td_error: 0.05886758863925934
        min_q: 3.7346720695495605
    num_steps_sampled: 2658304
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2596,5885.92,2658304,48.6684,53.6649,44.7967,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-46-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.664867426987485
  episode_reward_mean: 49.4174623787432
  episode_reward_min: 44.79673112435675
  episodes_this_iter: 8
  episodes_total: 26608
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2661376
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015038893558084965
        max_q: 2.3660595417022705
        mean_q: 2.1538703441619873
        mean_td_error: -0.15837576985359192
        min_q: 1.904914379119873
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0007056459435261786
        max_q: 4.348130702972412
        mean_q: 4.312425136566162
        mean_td_error: 0.011247888207435608
        min_q: 4.194210052490234
    num_steps_sampled: 2661376
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2599,5891.97,2661376,49.4175,53.6649,44.7967,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-47-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.66414979985049
  episode_reward_mean: 49.29983634564691
  episode_reward_min: 46.47653057129005
  episodes_this_iter: 8
  episodes_total: 26640
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2664448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04639395698904991
        max_q: 1.7597062587738037
        mean_q: 1.5769010782241821
        mean_td_error: -0.5315498113632202
        min_q: 1.3608969449996948
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012757093645632267
        max_q: 4.516629695892334
        mean_q: 4.443902015686035
        mean_td_error: -0.02099442481994629
        min_q: 4.321633338928223
    num_steps_sampled: 2664448
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2602,5897.73,2664448,49.2998,53.6641,46.4765,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-47-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.94916487575163
  episode_reward_mean: 50.694045382581955
  episode_reward_min: 46.4952719759514
  episodes_this_iter: 8
  episodes_total: 26672
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2667520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011332320980727673
        max_q: 2.502121925354004
        mean_q: 2.3554368019104004
        mean_td_error: -0.12801578640937805
        min_q: 2.2747695446014404
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010366091737523675
        max_q: 4.7003173828125
        mean_q: 4.624044418334961
        mean_td_error: 0.0028428584337234497
        min_q: 4.58450174331665
    num_steps_sampled: 2667520
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2605,5903.39,2667520,50.694,53.9492,46.4953,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-47-16
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.9590392240521
  episode_reward_mean: 50.89062599789048
  episode_reward_min: 45.321987751656934
  episodes_this_iter: 16
  episodes_total: 26704
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2670592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014472054317593575
        max_q: 2.9475340843200684
        mean_q: 2.7751898765563965
        mean_td_error: 0.010694839060306549
        min_q: 2.7224907875061035
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026419267524033785
        max_q: 4.698171615600586
        mean_q: 4.646870136260986
        mean_td_error: -0.04556456208229065
        min_q: 4.6209716796875
    num_steps_sampled: 2670592
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2608,5909.33,2670592,50.8906,55.959,45.322,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-47-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.9590392240521
  episode_reward_mean: 51.00269233295774
  episode_reward_min: 45.321987751656934
  episodes_this_iter: 16
  episodes_total: 26736
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2673664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009441127069294453
        max_q: 3.298468828201294
        mean_q: 3.194105386734009
        mean_td_error: -0.09610044211149216
        min_q: 3.0830821990966797
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023740967735648155
        max_q: 4.734989166259766
        mean_q: 4.669347763061523
        mean_td_error: -0.0510869175195694
        min_q: 4.627923011779785
    num_steps_sampled: 2673664
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2611,5915.32,2673664,51.0027,55.959,45.322,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-47-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.9590392240521
  episode_reward_mean: 50.11424432336838
  episode_reward_min: 45.321987751656934
  episodes_this_iter: 8
  episodes_total: 26760
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2676736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014840260148048401
        max_q: 3.4274940490722656
        mean_q: 3.217343807220459
        mean_td_error: -0.15941649675369263
        min_q: 3.133265495300293
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019290292402729392
        max_q: 4.786617279052734
        mean_q: 4.697666168212891
        mean_td_error: -0.04023320972919464
        min_q: 4.595424175262451
    num_steps_sampled: 2676736
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2614,5921.26,2676736,50.1142,55.959,45.322,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-47-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.31763775364141
  episode_reward_mean: 50.18520310041
  episode_reward_min: 45.89596134036836
  episodes_this_iter: 8
  episodes_total: 26792
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2679808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03288893401622772
        max_q: 3.19978666305542
        mean_q: 2.9970927238464355
        mean_td_error: -0.3459254503250122
        min_q: 2.7816359996795654
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020694646518677473
        max_q: 4.860757827758789
        mean_q: 4.766826629638672
        mean_td_error: 0.03490069508552551
        min_q: 4.7202348709106445
    num_steps_sampled: 2679808
    num_steps_trained: 26

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2617,5927.07,2679808,50.1852,54.3176,45.896,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-47-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.31763775364141
  episode_reward_mean: 50.81847393374665
  episode_reward_min: 45.89596134036836
  episodes_this_iter: 8
  episodes_total: 26824
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2682880
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010587435215711594
        max_q: 3.018301248550415
        mean_q: 2.9205784797668457
        mean_td_error: -0.10967098921537399
        min_q: 2.817535877227783
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001098837936297059
        max_q: 4.729377746582031
        mean_q: 4.598962306976318
        mean_td_error: 0.005995512008666992
        min_q: 4.476921558380127
    num_steps_sampled: 2682880
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2620,5932.87,2682880,50.8185,54.3176,45.896,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-47-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.31763775364141
  episode_reward_mean: 50.846692634849134
  episode_reward_min: 45.83482003741499
  episodes_this_iter: 8
  episodes_total: 26856
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2685952
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00376238115131855
        max_q: 3.2534027099609375
        mean_q: 3.1637563705444336
        mean_td_error: -0.05409694463014603
        min_q: 2.9352407455444336
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023891793098300695
        max_q: 4.547306537628174
        mean_q: 4.389524936676025
        mean_td_error: -0.03037489950656891
        min_q: 4.358134746551514
    num_steps_sampled: 2685952
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2623,5938.5,2685952,50.8467,54.3176,45.8348,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-47-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.19303894864587
  episode_reward_mean: 50.72367756475978
  episode_reward_min: 45.83482003741499
  episodes_this_iter: 8
  episodes_total: 26888
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2689024
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001481918036006391
        max_q: 3.474903106689453
        mean_q: 3.421302080154419
        mean_td_error: -0.012389875948429108
        min_q: 3.3259847164154053
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001679267967119813
        max_q: 4.583805084228516
        mean_q: 4.475115776062012
        mean_td_error: -0.024159446358680725
        min_q: 4.353754997253418
    num_steps_sampled: 2689024
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2626,5944.15,2689024,50.7237,54.193,45.8348,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-47-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.693431356975125
  episode_reward_mean: 50.53222997989328
  episode_reward_min: 45.83482003741499
  episodes_this_iter: 16
  episodes_total: 26920
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2692096
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005247859284281731
        max_q: 3.5411272048950195
        mean_q: 3.4428353309631348
        mean_td_error: -0.06363672018051147
        min_q: 3.3858988285064697
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011890707537531853
        max_q: 4.552186489105225
        mean_q: 4.515744209289551
        mean_td_error: 0.021513909101486206
        min_q: 4.440659523010254
    num_steps_sampled: 2692096
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2629,5950.31,2692096,50.5322,57.6934,45.8348,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-48-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.693431356975125
  episode_reward_mean: 50.30286030930425
  episode_reward_min: 45.83482003741499
  episodes_this_iter: 8
  episodes_total: 26944
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2695168
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014393074437975883
        max_q: 3.3478281497955322
        mean_q: 3.2813265323638916
        mean_td_error: -0.17784735560417175
        min_q: 3.1792197227478027
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026511973701417446
        max_q: 4.638328552246094
        mean_q: 4.55650520324707
        mean_td_error: -0.057991862297058105
        min_q: 4.450468063354492
    num_steps_sampled: 2695168
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2632,5956.17,2695168,50.3029,57.6934,45.8348,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-48-11
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.693431356975125
  episode_reward_mean: 50.4883588387429
  episode_reward_min: 46.80056533871346
  episodes_this_iter: 8
  episodes_total: 26976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2698240
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002009365241974592
        max_q: 3.3937861919403076
        mean_q: 3.2101213932037354
        mean_td_error: -0.0011253803968429565
        min_q: 3.0898711681365967
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026096776127815247
        max_q: 4.699416637420654
        mean_q: 4.590394973754883
        mean_td_error: 0.046096667647361755
        min_q: 4.550130367279053
    num_steps_sampled: 2698240
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2635,5962.47,2698240,50.4884,57.6934,46.8006,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-48-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.693431356975125
  episode_reward_mean: 50.5472603471257
  episode_reward_min: 46.80056533871346
  episodes_this_iter: 8
  episodes_total: 27008
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2701312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008826684206724167
        max_q: 3.3448808193206787
        mean_q: 3.223074436187744
        mean_td_error: -0.1258186399936676
        min_q: 3.1157259941101074
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017160214483737946
        max_q: 4.496058464050293
        mean_q: 3.663109064102173
        mean_td_error: -0.21288545429706573
        min_q: 3.3555502891540527
    num_steps_sampled: 2701312
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2638,5968.81,2701312,50.5473,57.6934,46.8006,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-48-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.27321500699947
  episode_reward_mean: 50.61042216568515
  episode_reward_min: 48.268812761307125
  episodes_this_iter: 8
  episodes_total: 27040
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2704384
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010367375798523426
        max_q: 3.4080474376678467
        mean_q: 3.3066632747650146
        mean_td_error: -0.1407092809677124
        min_q: 3.1895861625671387
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016119791194796562
        max_q: 3.911142349243164
        mean_q: 3.6587073802948
        mean_td_error: -0.2414398342370987
        min_q: 3.423689126968384
    num_steps_sampled: 2704384
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2641,5975.47,2704384,50.6104,53.2732,48.2688,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-48-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.27321500699947
  episode_reward_mean: 49.594023944240064
  episode_reward_min: 43.889022999777175
  episodes_this_iter: 16
  episodes_total: 27064
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2706432
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.033135298639535904
        max_q: 3.1737215518951416
        mean_q: 2.9971771240234375
        mean_td_error: -0.4305321276187897
        min_q: 2.8158726692199707
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02711184322834015
        max_q: 2.830326795578003
        mean_q: 2.2841291427612305
        mean_td_error: -0.3321249485015869
        min_q: 1.9228858947753906
    num_steps_sampled: 2706432
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2643,5980.59,2706432,49.594,53.2732,43.889,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-48-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.27321500699947
  episode_reward_mean: 49.66457912038988
  episode_reward_min: 43.889022999777175
  episodes_this_iter: 8
  episodes_total: 27080
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2708480
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018069373443722725
        max_q: 3.5652310848236084
        mean_q: 3.2759459018707275
        mean_td_error: -0.2312595248222351
        min_q: 3.1006040573120117
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020563448779284954
        max_q: 2.6256141662597656
        mean_q: 2.4260952472686768
        mean_td_error: 0.010895319283008575
        min_q: 2.2359137535095215
    num_steps_sampled: 2708480
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2645,5986.14,2708480,49.6646,53.2732,43.889,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-48-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.47933794808732
  episode_reward_mean: 50.031702288603476
  episode_reward_min: 43.889022999777175
  episodes_this_iter: 16
  episodes_total: 27104
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2710528
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.030155988410115242
        max_q: 3.5115551948547363
        mean_q: 3.2000486850738525
        mean_td_error: -0.43031883239746094
        min_q: 2.973344564437866
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01966381072998047
        max_q: 2.4848814010620117
        mean_q: 2.2833664417266846
        mean_td_error: -0.244367316365242
        min_q: 1.9947154521942139
    num_steps_sampled: 2710528
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2647,5992.19,2710528,50.0317,55.4793,43.889,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-48-49
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.47933794808732
  episode_reward_mean: 49.906636498841614
  episode_reward_min: 43.889022999777175
  episodes_this_iter: 8
  episodes_total: 27120
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2712576
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0072877006605267525
        max_q: 3.614941120147705
        mean_q: 3.3967201709747314
        mean_td_error: -0.08203663676977158
        min_q: 3.2449588775634766
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03334246575832367
        max_q: 2.2139363288879395
        mean_q: 1.8582522869110107
        mean_td_error: -0.5216520428657532
        min_q: 1.5792033672332764
    num_steps_sampled: 2712576
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2649,5999.2,2712576,49.9066,55.4793,43.889,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-48-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.778889559810565
  episode_reward_mean: 50.56144861334147
  episode_reward_min: 43.889022999777175
  episodes_this_iter: 8
  episodes_total: 27144
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2714624
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005051840096712112
        max_q: 3.5299439430236816
        mean_q: 3.373340606689453
        mean_td_error: -0.0702226385474205
        min_q: 3.2840943336486816
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04106418415904045
        max_q: 1.710325002670288
        mean_q: 1.4258019924163818
        mean_td_error: -0.4680289626121521
        min_q: 1.1617076396942139
    num_steps_sampled: 2714624
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2651,6005.7,2714624,50.5614,56.7789,43.889,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-49-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.778889559810565
  episode_reward_mean: 51.49754578025854
  episode_reward_min: 47.441710626453926
  episodes_this_iter: 8
  episodes_total: 27160
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2716672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007264016196131706
        max_q: 3.554281711578369
        mean_q: 3.469025135040283
        mean_td_error: -0.10480525344610214
        min_q: 3.4147064685821533
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011455629020929337
        max_q: 1.9604777097702026
        mean_q: 1.7155673503875732
        mean_td_error: -0.12926088273525238
        min_q: 1.615359902381897
    num_steps_sampled: 2716672
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2653,6011.45,2716672,51.4975,56.7789,47.4417,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-49-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.778889559810565
  episode_reward_mean: 52.357207969859274
  episode_reward_min: 47.441710626453926
  episodes_this_iter: 8
  episodes_total: 27184
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2718720
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00864595826715231
        max_q: 3.7676587104797363
        mean_q: 3.5520782470703125
        mean_td_error: -0.1147371158003807
        min_q: 3.3791162967681885
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015743045136332512
        max_q: 2.3257346153259277
        mean_q: 1.9887909889221191
        mean_td_error: -0.18079324066638947
        min_q: 1.8430047035217285
    num_steps_sampled: 2718720
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2655,6016.76,2718720,52.3572,56.7789,47.4417,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-49-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.778889559810565
  episode_reward_mean: 52.13185383650191
  episode_reward_min: 47.441710626453926
  episodes_this_iter: 8
  episodes_total: 27200
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2720768
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004905885551124811
        max_q: 3.5620105266571045
        mean_q: 3.4148218631744385
        mean_td_error: -0.0971699208021164
        min_q: 3.324313163757324
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04691352695226669
        max_q: 2.161954879760742
        mean_q: 1.8052358627319336
        mean_td_error: -0.4859008193016052
        min_q: 1.5569429397583008
    num_steps_sampled: 2720768
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2657,6022.21,2720768,52.1319,56.7789,47.4417,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-49-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.778889559810565
  episode_reward_mean: 52.648573353689805
  episode_reward_min: 47.441710626453926
  episodes_this_iter: 8
  episodes_total: 27224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2722816
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0029210830107331276
        max_q: 3.5870590209960938
        mean_q: 3.5180954933166504
        mean_td_error: -0.050866082310676575
        min_q: 3.468902111053467
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012611097656190395
        max_q: 2.101743698120117
        mean_q: 1.7673919200897217
        mean_td_error: -0.11921758204698563
        min_q: 1.462276577949524
    num_steps_sampled: 2722816
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2659,6027.54,2722816,52.6486,56.7789,47.4417,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-49-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.505616338139696
  episode_reward_mean: 53.1756495645595
  episode_reward_min: 49.24104910165444
  episodes_this_iter: 16
  episodes_total: 27248
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2724864
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021422673016786575
        max_q: 3.841324806213379
        mean_q: 3.766110897064209
        mean_td_error: -0.03438110649585724
        min_q: 3.70206356048584
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009338895790278912
        max_q: 2.305837631225586
        mean_q: 2.1573705673217773
        mean_td_error: -0.09998892992734909
        min_q: 2.0604605674743652
    num_steps_sampled: 2724864
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2661,6032.89,2724864,53.1756,56.5056,49.241,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-49-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.505616338139696
  episode_reward_mean: 52.920949365055456
  episode_reward_min: 49.24104910165444
  episodes_this_iter: 8
  episodes_total: 27264
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2726912
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021951301023364067
        max_q: 4.031320571899414
        mean_q: 3.925915002822876
        mean_td_error: -0.039230309426784515
        min_q: 3.8321309089660645
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014782330952584743
        max_q: 2.805922508239746
        mean_q: 2.5278146266937256
        mean_td_error: -0.16419121623039246
        min_q: 2.3326563835144043
    num_steps_sampled: 2726912
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2663,6038.29,2726912,52.9209,56.5056,49.241,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-49-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.62665590219181
  episode_reward_mean: 53.11073819285281
  episode_reward_min: 49.24104910165444
  episodes_this_iter: 16
  episodes_total: 27288
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2728960
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001804092898964882
        max_q: 4.06427526473999
        mean_q: 3.9836316108703613
        mean_td_error: -0.03029744327068329
        min_q: 3.9058892726898193
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012366874143481255
        max_q: 2.916721820831299
        mean_q: 2.730231285095215
        mean_td_error: 0.1910085678100586
        min_q: 2.5751781463623047
    num_steps_sampled: 2728960
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2665,6043.76,2728960,53.1107,56.6267,49.241,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-49-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.62665590219181
  episode_reward_mean: 53.032381438800364
  episode_reward_min: 49.13277402321198
  episodes_this_iter: 8
  episodes_total: 27304
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2731008
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002395760966464877
        max_q: 4.286827564239502
        mean_q: 4.175573348999023
        mean_td_error: -0.03867939114570618
        min_q: 4.0801167488098145
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0028363228775560856
        max_q: 3.4019336700439453
        mean_q: 3.244420051574707
        mean_td_error: 0.03839070349931717
        min_q: 3.0799601078033447
    num_steps_sampled: 2731008
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2667,6049.52,2731008,53.0324,56.6267,49.1328,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-49-47
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.62665590219181
  episode_reward_mean: 52.47115335107183
  episode_reward_min: 49.13277402321198
  episodes_this_iter: 8
  episodes_total: 27328
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2733056
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0036389429587870836
        max_q: 4.452021598815918
        mean_q: 4.351898193359375
        mean_td_error: -0.06583935022354126
        min_q: 4.268165588378906
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0034049623645842075
        max_q: 3.749843120574951
        mean_q: 3.595841407775879
        mean_td_error: 0.04923082888126373
        min_q: 3.4992454051971436
    num_steps_sampled: 2733056
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2669,6055.31,2733056,52.4712,56.6267,49.1328,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-49-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.663523296355095
  episode_reward_mean: 52.65275577893348
  episode_reward_min: 49.13277402321198
  episodes_this_iter: 8
  episodes_total: 27344
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2735104
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002726842649281025
        max_q: 4.577723979949951
        mean_q: 4.545279026031494
        mean_td_error: -0.04727071523666382
        min_q: 4.509727478027344
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008302594651468098
        max_q: 3.7630534172058105
        mean_q: 3.6892940998077393
        mean_td_error: 0.0033940300345420837
        min_q: 3.5182716846466064
    num_steps_sampled: 2735104
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2671,6060.93,2735104,52.6528,56.6635,49.1328,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-49-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.663523296355095
  episode_reward_mean: 52.96203994110885
  episode_reward_min: 49.13277402321198
  episodes_this_iter: 8
  episodes_total: 27368
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2737152
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015380553668364882
        max_q: 4.5646748542785645
        mean_q: 4.517518520355225
        mean_td_error: -0.021396160125732422
        min_q: 4.451223373413086
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021329547744244337
        max_q: 4.088431358337402
        mean_q: 4.053839683532715
        mean_td_error: 0.02907974272966385
        min_q: 3.9294230937957764
    num_steps_sampled: 2737152
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2673,6066.47,2737152,52.962,56.6635,49.1328,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-50-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.663523296355095
  episode_reward_mean: 53.02510927282275
  episode_reward_min: 49.13277402321198
  episodes_this_iter: 16
  episodes_total: 27392
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2739200
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00359499198384583
        max_q: 4.769650459289551
        mean_q: 4.684168815612793
        mean_td_error: 0.06641221046447754
        min_q: 4.526428699493408
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037127479445189238
        max_q: 4.387078762054443
        mean_q: 4.341341495513916
        mean_td_error: -0.05544058978557587
        min_q: 4.282544136047363
    num_steps_sampled: 2739200
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2675,6071.75,2739200,53.0251,56.6635,49.1328,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-50-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.27870345251195
  episode_reward_mean: 53.5152097661249
  episode_reward_min: 49.01845835300708
  episodes_this_iter: 8
  episodes_total: 27408
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2741248
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003271516878157854
        max_q: 4.9187188148498535
        mean_q: 4.733875274658203
        mean_td_error: 0.059703946113586426
        min_q: 4.622086524963379
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023636952973902225
        max_q: 4.335245132446289
        mean_q: 4.27009391784668
        mean_td_error: 0.028891459107398987
        min_q: 4.215158939361572
    num_steps_sampled: 2741248
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2677,6077.02,2741248,53.5152,57.2787,49.0185,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-50-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.27870345251195
  episode_reward_mean: 53.70037512124207
  episode_reward_min: 49.01845835300708
  episodes_this_iter: 16
  episodes_total: 27432
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2743296
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019376378506422043
        max_q: 4.57011079788208
        mean_q: 4.33662748336792
        mean_td_error: -0.364182710647583
        min_q: 4.160156726837158
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00668953126296401
        max_q: 4.1339263916015625
        mean_q: 4.0643486976623535
        mean_td_error: -0.0914197564125061
        min_q: 3.9878311157226562
    num_steps_sampled: 2743296
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2679,6082.7,2743296,53.7004,57.2787,49.0185,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-50-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.27870345251195
  episode_reward_mean: 53.13125905251197
  episode_reward_min: 49.01845835300708
  episodes_this_iter: 8
  episodes_total: 27448
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2745344
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00148733367677778
        max_q: 4.552350997924805
        mean_q: 4.396895408630371
        mean_td_error: -0.0001998990774154663
        min_q: 4.34065055847168
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005249998066574335
        max_q: 4.131298542022705
        mean_q: 4.064074993133545
        mean_td_error: -0.07991670072078705
        min_q: 4.000760555267334
    num_steps_sampled: 2745344
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2681,6088.32,2745344,53.1313,57.2787,49.0185,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-50-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.27870345251195
  episode_reward_mean: 52.91181834651514
  episode_reward_min: 49.01845835300708
  episodes_this_iter: 16
  episodes_total: 27472
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2747392
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004825614858418703
        max_q: 4.501984596252441
        mean_q: 4.422353744506836
        mean_td_error: 0.09551972150802612
        min_q: 4.2566962242126465
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004334993194788694
        max_q: 4.1074910163879395
        mean_q: 4.054988384246826
        mean_td_error: -0.06339247524738312
        min_q: 4.010952949523926
    num_steps_sampled: 2747392
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2683,6093.44,2747392,52.9118,57.2787,49.0185,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-50-31
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.27870345251195
  episode_reward_mean: 52.59406398337198
  episode_reward_min: 49.01845835300708
  episodes_this_iter: 8
  episodes_total: 27488
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2749440
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003768111811950803
        max_q: 4.382730484008789
        mean_q: 4.267540454864502
        mean_td_error: -0.06733797490596771
        min_q: 4.158605098724365
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001986877294257283
        max_q: 4.258225917816162
        mean_q: 4.2061944007873535
        mean_td_error: -0.024172410368919373
        min_q: 4.139087200164795
    num_steps_sampled: 2749440
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2685,6098.36,2749440,52.5941,57.2787,49.0185,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-50-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.99925063931385
  episode_reward_mean: 51.771980095082384
  episode_reward_min: 46.002440168236454
  episodes_this_iter: 8
  episodes_total: 27512
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2751488
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001455468125641346
        max_q: 4.350566387176514
        mean_q: 4.218752861022949
        mean_td_error: -0.006755799055099487
        min_q: 4.016160011291504
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002714870497584343
        max_q: 4.232131481170654
        mean_q: 4.17605447769165
        mean_td_error: -0.03916040062904358
        min_q: 4.088307857513428
    num_steps_sampled: 2751488
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2687,6103.28,2751488,51.772,54.9993,46.0024,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-50-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.44784607788264
  episode_reward_mean: 52.24061991141901
  episode_reward_min: 46.002440168236454
  episodes_this_iter: 16
  episodes_total: 27544
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2754560
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016560823423787951
        max_q: 4.310173511505127
        mean_q: 4.2452263832092285
        mean_td_error: -0.02937716245651245
        min_q: 4.205925464630127
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01882799156010151
        max_q: 4.0632643699646
        mean_q: 3.9318184852600098
        mean_td_error: -0.2574712038040161
        min_q: 3.833004951477051
    num_steps_sampled: 2754560
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2690,6110.36,2754560,52.2406,56.4478,46.0024,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-50-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.44784607788264
  episode_reward_mean: 52.54743849391716
  episode_reward_min: 46.002440168236454
  episodes_this_iter: 16
  episodes_total: 27576
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2757632
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00545661523938179
        max_q: 4.588304042816162
        mean_q: 4.450016498565674
        mean_td_error: -0.08977200090885162
        min_q: 4.3678507804870605
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009901629760861397
        max_q: 3.9821362495422363
        mean_q: 3.590470790863037
        mean_td_error: -0.12251915037631989
        min_q: 3.4174866676330566
    num_steps_sampled: 2757632
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2693,6117.35,2757632,52.5474,56.4478,46.0024,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-50-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.44784607788264
  episode_reward_mean: 52.93258320840618
  episode_reward_min: 46.002440168236454
  episodes_this_iter: 8
  episodes_total: 27600
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2760704
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003229708643630147
        max_q: 4.816905498504639
        mean_q: 4.62848424911499
        mean_td_error: -0.047293663024902344
        min_q: 4.5191473960876465
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004720213823020458
        max_q: 3.7668585777282715
        mean_q: 3.604433536529541
        mean_td_error: -0.05994889885187149
        min_q: 3.5308873653411865
    num_steps_sampled: 2760704
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2696,6123.99,2760704,52.9326,56.4478,46.0024,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-51-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.44784607788264
  episode_reward_mean: 52.80115415846194
  episode_reward_min: 50.13540945969507
  episodes_this_iter: 8
  episodes_total: 27632
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2763776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016059732297435403
        max_q: 4.824328899383545
        mean_q: 4.765337944030762
        mean_td_error: -0.03187116980552673
        min_q: 4.688852310180664
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007368756458163261
        max_q: 3.8823482990264893
        mean_q: 3.841284990310669
        mean_td_error: -0.10421547293663025
        min_q: 3.8013176918029785
    num_steps_sampled: 2763776
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2699,6131.09,2763776,52.8012,56.4478,50.1354,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-51-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.67844627263684
  episode_reward_mean: 52.65964669039183
  episode_reward_min: 49.81025586860024
  episodes_this_iter: 8
  episodes_total: 27656
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2765824
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008872860344126821
        max_q: 4.785543918609619
        mean_q: 4.735771179199219
        mean_td_error: 0.0014644116163253784
        min_q: 4.648853302001953
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007114442065358162
        max_q: 4.0310282707214355
        mean_q: 3.9520950317382812
        mean_td_error: -0.11844091862440109
        min_q: 3.8451876640319824
    num_steps_sampled: 2765824
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2701,6136,2765824,52.6596,55.6784,49.8103,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-51-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.67844627263684
  episode_reward_mean: 52.2521867087405
  episode_reward_min: 48.4711233990033
  episodes_this_iter: 8
  episodes_total: 27672
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2767872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01940849795937538
        max_q: 4.7537713050842285
        mean_q: 4.552175045013428
        mean_td_error: -0.38718950748443604
        min_q: 4.4156813621521
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004250896628946066
        max_q: 4.014068126678467
        mean_q: 3.879103422164917
        mean_td_error: -0.05990143120288849
        min_q: 3.5795681476593018
    num_steps_sampled: 2767872
    num_steps_trained: 27

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2703,6140.96,2767872,52.2522,55.6784,48.4711,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-51-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.01434405727676
  episode_reward_mean: 51.739637913296
  episode_reward_min: 48.4711233990033
  episodes_this_iter: 8
  episodes_total: 27696
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2769920
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006992404814809561
        max_q: 4.9418487548828125
        mean_q: 4.530001640319824
        mean_td_error: -0.0873253345489502
        min_q: 4.201450347900391
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002835101680830121
        max_q: 4.030422210693359
        mean_q: 3.953540325164795
        mean_td_error: -0.0407724529504776
        min_q: 3.904038906097412
    num_steps_sampled: 2769920
    num_steps_trained: 276

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2705,6146.11,2769920,51.7396,55.0143,48.4711,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-51-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.26546100404065
  episode_reward_mean: 52.019721903994544
  episode_reward_min: 48.4711233990033
  episodes_this_iter: 8
  episodes_total: 27712
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2771968
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018005050718784332
        max_q: 4.601851463317871
        mean_q: 4.321167469024658
        mean_td_error: -0.3693070411682129
        min_q: 4.162899971008301
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00650822976604104
        max_q: 4.170465469360352
        mean_q: 4.099524021148682
        mean_td_error: -0.0843333899974823
        min_q: 3.9986166954040527
    num_steps_sampled: 2771968
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2707,6151.58,2771968,52.0197,55.2655,48.4711,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-51-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.26546100404065
  episode_reward_mean: 52.16432690069368
  episode_reward_min: 47.52321356642007
  episodes_this_iter: 8
  episodes_total: 27736
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2774016
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026816176250576973
        max_q: 4.534332275390625
        mean_q: 4.4658613204956055
        mean_td_error: -0.05263237655162811
        min_q: 4.395471096038818
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017737576737999916
        max_q: 4.1557464599609375
        mean_q: 4.087959289550781
        mean_td_error: -0.26691576838493347
        min_q: 3.953486680984497
    num_steps_sampled: 2774016
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2709,6156.84,2774016,52.1643,55.2655,47.5232,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-51-37
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.26546100404065
  episode_reward_mean: 51.341049379136095
  episode_reward_min: 47.52321356642007
  episodes_this_iter: 16
  episodes_total: 27760
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2776064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008410469628870487
        max_q: 4.534236431121826
        mean_q: 4.4873881340026855
        mean_td_error: 0.179319828748703
        min_q: 4.433541297912598
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009876146912574768
        max_q: 4.321481227874756
        mean_q: 4.244614601135254
        mean_td_error: 0.13759203255176544
        min_q: 4.093257427215576
    num_steps_sampled: 2776064
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2711,6161.98,2776064,51.341,55.2655,47.5232,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-51-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.5560695631375
  episode_reward_mean: 51.916165874613455
  episode_reward_min: 47.52321356642007
  episodes_this_iter: 8
  episodes_total: 27776
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2778112
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012759983539581299
        max_q: 4.417294502258301
        mean_q: 4.286086082458496
        mean_td_error: -0.2535940706729889
        min_q: 4.087095737457275
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004764143377542496
        max_q: 4.452775478363037
        mean_q: 4.315402030944824
        mean_td_error: 0.049247533082962036
        min_q: 4.118592262268066
    num_steps_sampled: 2778112
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2713,6167.36,2778112,51.9162,58.5561,47.5232,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-51-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.5560695631375
  episode_reward_mean: 52.67545932927315
  episode_reward_min: 47.52321356642007
  episodes_this_iter: 16
  episodes_total: 27800
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2780160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006569271441549063
        max_q: 4.574288368225098
        mean_q: 4.495467185974121
        mean_td_error: 0.12898114323616028
        min_q: 4.382734298706055
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002828264143317938
        max_q: 4.553667068481445
        mean_q: 4.437367916107178
        mean_td_error: 0.031998395919799805
        min_q: 4.359005451202393
    num_steps_sampled: 2780160
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2715,6173.23,2780160,52.6755,58.5561,47.5232,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-51-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.5560695631375
  episode_reward_mean: 52.43544298023296
  episode_reward_min: 47.52321356642007
  episodes_this_iter: 8
  episodes_total: 27816
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2782208
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011600692756474018
        max_q: 4.788151741027832
        mean_q: 4.437114238739014
        mean_td_error: -0.17174895107746124
        min_q: 4.324987888336182
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009409586898982525
        max_q: 4.3730149269104
        mean_q: 4.010293006896973
        mean_td_error: 0.07476512342691422
        min_q: 3.5740408897399902
    num_steps_sampled: 2782208
    num_steps_trained: 27

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2717,6178.77,2782208,52.4354,58.5561,47.5232,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-52-00
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.545910679974305
  episode_reward_mean: 53.608263046239315
  episode_reward_min: 48.670926318014466
  episodes_this_iter: 8
  episodes_total: 27840
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2784256
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.06007977947592735
        max_q: 3.9229984283447266
        mean_q: 3.3799498081207275
        mean_td_error: -1.2152979373931885
        min_q: 3.2326302528381348
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.042450837790966034
        max_q: 3.567288398742676
        mean_q: 2.9028725624084473
        mean_td_error: -0.3141648471355438
        min_q: 2.5359764099121094
    num_steps_sampled: 2784256
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2719,6184.51,2784256,53.6083,61.5459,48.6709,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-52-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.545910679974305
  episode_reward_mean: 53.83937065217623
  episode_reward_min: 46.004383456619784
  episodes_this_iter: 8
  episodes_total: 27856
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2786304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.022892488166689873
        max_q: 3.259397268295288
        mean_q: 2.783503293991089
        mean_td_error: -0.3551867604255676
        min_q: 2.4656457901000977
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03354181349277496
        max_q: 3.235429048538208
        mean_q: 1.63105309009552
        mean_td_error: -0.1662750095129013
        min_q: 0.8550139665603638
    num_steps_sampled: 2786304
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2721,6190.2,2786304,53.8394,61.5459,46.0044,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-52-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.545910679974305
  episode_reward_mean: 53.04441443315379
  episode_reward_min: 45.904142904586244
  episodes_this_iter: 8
  episodes_total: 27880
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2788352
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.027180258184671402
        max_q: 2.774660348892212
        mean_q: 2.5682239532470703
        mean_td_error: -0.31631162762641907
        min_q: 2.352318286895752
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.028364308178424835
        max_q: 2.7558858394622803
        mean_q: 2.3693296909332275
        mean_td_error: -0.28193360567092896
        min_q: 2.1771748065948486
    num_steps_sampled: 2788352
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2723,6195.92,2788352,53.0444,61.5459,45.9041,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-52-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.545910679974305
  episode_reward_mean: 52.27027589395711
  episode_reward_min: 45.904142904586244
  episodes_this_iter: 16
  episodes_total: 27904
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2790400
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03997131437063217
        max_q: 2.2121315002441406
        mean_q: 1.9839578866958618
        mean_td_error: -0.48204195499420166
        min_q: 1.7437465190887451
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01614077389240265
        max_q: 2.884209156036377
        mean_q: 2.4839391708374023
        mean_td_error: -0.130296528339386
        min_q: 2.2382733821868896
    num_steps_sampled: 2790400
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2725,6201.66,2790400,52.2703,61.5459,45.9041,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-52-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.545910679974305
  episode_reward_mean: 52.06080556768342
  episode_reward_min: 45.904142904586244
  episodes_this_iter: 8
  episodes_total: 27920
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2792448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017791202990338206
        max_q: 2.6682515144348145
        mean_q: 2.5478994846343994
        mean_td_error: -0.01634056866168976
        min_q: 2.405416965484619
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004819812718778849
        max_q: 3.238745927810669
        mean_q: 3.08780837059021
        mean_td_error: 0.0360470712184906
        min_q: 3.0261919498443604
    num_steps_sampled: 2792448
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2727,6207.61,2792448,52.0608,61.5459,45.9041,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-52-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.86336571085304
  episode_reward_mean: 50.483859015547786
  episode_reward_min: 45.904142904586244
  episodes_this_iter: 16
  episodes_total: 27944
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2794496
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005809127818793058
        max_q: 3.0879673957824707
        mean_q: 2.996257781982422
        mean_td_error: -0.08950477093458176
        min_q: 2.923154830932617
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035553465131670237
        max_q: 3.511876344680786
        mean_q: 3.429643154144287
        mean_td_error: -0.034673042595386505
        min_q: 3.3512072563171387
    num_steps_sampled: 2794496
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2729,6213.61,2794496,50.4839,56.8634,45.9041,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-52-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.90386064626517
  episode_reward_mean: 50.8697227613984
  episode_reward_min: 45.904142904586244
  episodes_this_iter: 8
  episodes_total: 27960
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2796544
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003876958740875125
        max_q: 3.4489078521728516
        mean_q: 3.3312692642211914
        mean_td_error: -0.05014624446630478
        min_q: 3.283447504043579
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0047212932258844376
        max_q: 3.956838607788086
        mean_q: 3.901425838470459
        mean_td_error: 0.07516030222177505
        min_q: 3.6934773921966553
    num_steps_sampled: 2796544
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2731,6219.45,2796544,50.8697,54.9039,45.9041,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-52-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.90386064626517
  episode_reward_mean: 49.566782798297716
  episode_reward_min: 42.72866920687964
  episodes_this_iter: 16
  episodes_total: 27984
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2798592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001252504764124751
        max_q: 3.5962536334991455
        mean_q: 3.499056816101074
        mean_td_error: -0.012606807053089142
        min_q: 3.4367055892944336
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002135435352101922
        max_q: 4.370228290557861
        mean_q: 4.338626861572266
        mean_td_error: 0.03132261335849762
        min_q: 4.262709617614746
    num_steps_sampled: 2798592
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2733,6225.27,2798592,49.5668,54.9039,42.7287,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-52-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.80175016167975
  episode_reward_mean: 48.83242964509591
  episode_reward_min: 42.72866920687964
  episodes_this_iter: 8
  episodes_total: 28000
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2800640
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003043059492483735
        max_q: 3.642967700958252
        mean_q: 3.5513229370117188
        mean_td_error: -0.04332723468542099
        min_q: 3.471047878265381
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002725605620071292
        max_q: 4.654139041900635
        mean_q: 4.6224775314331055
        mean_td_error: 0.04681578278541565
        min_q: 4.504283428192139
    num_steps_sampled: 2800640
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2735,6231.16,2800640,48.8324,53.8018,42.7287,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-52-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.80175016167975
  episode_reward_mean: 48.37826678935654
  episode_reward_min: 42.72866920687964
  episodes_this_iter: 8
  episodes_total: 28024
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2802688
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005898546427488327
        max_q: 3.777401924133301
        mean_q: 3.6821231842041016
        mean_td_error: -0.0795387402176857
        min_q: 3.6384871006011963
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003995263949036598
        max_q: 4.811712265014648
        mean_q: 4.7769694328308105
        mean_td_error: 0.06427180767059326
        min_q: 4.64252233505249
    num_steps_sampled: 2802688
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2737,6236.74,2802688,48.3783,53.8018,42.7287,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-52-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.944334968252086
  episode_reward_mean: 48.59022281793819
  episode_reward_min: 42.72866920687964
  episodes_this_iter: 8
  episodes_total: 28040
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2804736
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00179470784496516
        max_q: 3.7503597736358643
        mean_q: 3.6784818172454834
        mean_td_error: -0.026591673493385315
        min_q: 3.412980318069458
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014098453102633357
        max_q: 4.8977274894714355
        mean_q: 4.8787689208984375
        mean_td_error: -0.022595375776290894
        min_q: 4.825439929962158
    num_steps_sampled: 2804736
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2739,6241.8,2804736,48.5902,54.9443,42.7287,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-53-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.944334968252086
  episode_reward_mean: 48.53253584277181
  episode_reward_min: 42.72866920687964
  episodes_this_iter: 8
  episodes_total: 28072
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2807808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037236271891742945
        max_q: 3.7305397987365723
        mean_q: 3.6471314430236816
        mean_td_error: -0.06081350892782211
        min_q: 3.5784363746643066
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011081008706241846
        max_q: 4.976523399353027
        mean_q: 4.938195705413818
        mean_td_error: -0.018188193440437317
        min_q: 4.83692741394043
    num_steps_sampled: 2807808
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2742,6248.41,2807808,48.5325,54.9443,42.7287,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-53-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.336639090880034
  episode_reward_mean: 50.38318630992128
  episode_reward_min: 46.4559541444616
  episodes_this_iter: 8
  episodes_total: 28104
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2810880
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005628690123558044
        max_q: 3.8600986003875732
        mean_q: 3.7294816970825195
        mean_td_error: -0.07402353733778
        min_q: 3.633382558822632
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011656655697152019
        max_q: 4.957955360412598
        mean_q: 4.9250335693359375
        mean_td_error: -0.02004493772983551
        min_q: 4.812666416168213
    num_steps_sampled: 2810880
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2745,6255.37,2810880,50.3832,55.3366,46.456,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-53-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.336639090880034
  episode_reward_mean: 50.55721551019249
  episode_reward_min: 46.4559541444616
  episodes_this_iter: 16
  episodes_total: 28128
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2812928
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012791829649358988
        max_q: 3.8637280464172363
        mean_q: 3.7924299240112305
        mean_td_error: -0.011387355625629425
        min_q: 3.642810821533203
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014672059332951903
        max_q: 5.040085315704346
        mean_q: 4.971617221832275
        mean_td_error: 0.02405652403831482
        min_q: 4.889107704162598
    num_steps_sampled: 2812928
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2747,6260.32,2812928,50.5572,55.3366,46.456,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-53-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.336639090880034
  episode_reward_mean: 50.67409371772172
  episode_reward_min: 46.4559541444616
  episodes_this_iter: 8
  episodes_total: 28144
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2814976
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007923241704702377
        max_q: 3.8052845001220703
        mean_q: 3.7612526416778564
        mean_td_error: -0.12717154622077942
        min_q: 3.6559598445892334
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003967315889894962
        max_q: 4.960598468780518
        mean_q: 4.9303131103515625
        mean_td_error: 0.07313284277915955
        min_q: 4.853411674499512
    num_steps_sampled: 2814976
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2749,6265.38,2814976,50.6741,55.3366,46.456,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-53-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.336639090880034
  episode_reward_mean: 50.354137739540604
  episode_reward_min: 46.4559541444616
  episodes_this_iter: 8
  episodes_total: 28168
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2817024
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0058211530558764935
        max_q: 4.135013580322266
        mean_q: 3.8966054916381836
        mean_td_error: 0.06538877636194229
        min_q: 3.769376039505005
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006101718172430992
        max_q: 4.912844181060791
        mean_q: 4.805794715881348
        mean_td_error: -0.10236610472202301
        min_q: 4.702456951141357
    num_steps_sampled: 2817024
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2751,6270.47,2817024,50.3541,55.3366,46.456,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-53-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.336639090880034
  episode_reward_mean: 50.41199034884261
  episode_reward_min: 48.05506896995812
  episodes_this_iter: 8
  episodes_total: 28184
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2819072
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006303796544671059
        max_q: 4.001668930053711
        mean_q: 3.6994941234588623
        mean_td_error: -0.05755896866321564
        min_q: 3.1218976974487305
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0007540180231444538
        max_q: 4.790070533752441
        mean_q: 4.73152494430542
        mean_td_error: 0.0107107013463974
        min_q: 4.6837873458862305
    num_steps_sampled: 2819072
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2753,6275.63,2819072,50.412,55.3366,48.0551,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-53-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.96754872571693
  episode_reward_mean: 51.276621015382716
  episode_reward_min: 48.05506896995812
  episodes_this_iter: 8
  episodes_total: 28208
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2821120
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.014507962390780449
        max_q: 2.9897592067718506
        mean_q: 2.691704511642456
        mean_td_error: -0.23358768224716187
        min_q: 2.448368549346924
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022670459002256393
        max_q: 4.6906585693359375
        mean_q: 4.607535362243652
        mean_td_error: 0.03502930700778961
        min_q: 4.5327887535095215
    num_steps_sampled: 2821120
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2755,6280.77,2821120,51.2766,54.9675,48.0551,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-53-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.96754872571693
  episode_reward_mean: 51.2305019924511
  episode_reward_min: 46.621761470222474
  episodes_this_iter: 8
  episodes_total: 28224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2823168
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00745412427932024
        max_q: 2.8879570960998535
        mean_q: 2.710517644882202
        mean_td_error: -0.12012749165296555
        min_q: 2.5174522399902344
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002816912718117237
        max_q: 4.578831672668457
        mean_q: 4.5072479248046875
        mean_td_error: -0.04755760729312897
        min_q: 4.437119007110596
    num_steps_sampled: 2823168
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2757,6286.03,2823168,51.2305,54.9675,46.6218,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-53-50
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.96754872571693
  episode_reward_mean: 50.90379203783634
  episode_reward_min: 46.53863544200248
  episodes_this_iter: 8
  episodes_total: 28248
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2825216
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005755912978202105
        max_q: 2.9953620433807373
        mean_q: 2.7978241443634033
        mean_td_error: 0.06991951167583466
        min_q: 2.6630935668945312
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003765632165595889
        max_q: 4.6515936851501465
        mean_q: 4.608014106750488
        mean_td_error: -0.06068961322307587
        min_q: 4.542631149291992
    num_steps_sampled: 2825216
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2759,6290.97,2825216,50.9038,54.9675,46.5386,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-53-55
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.96754872571693
  episode_reward_mean: 51.17543835082447
  episode_reward_min: 46.53863544200248
  episodes_this_iter: 16
  episodes_total: 28272
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2827264
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005379585083574057
        max_q: 2.656919479370117
        mean_q: 2.5155372619628906
        mean_td_error: -0.08645685017108917
        min_q: 2.4309298992156982
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003564531449228525
        max_q: 4.750992774963379
        mean_q: 4.677108287811279
        mean_td_error: 0.06408272683620453
        min_q: 4.591153144836426
    num_steps_sampled: 2827264
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2761,6295.93,2827264,51.1754,54.9675,46.5386,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-54-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.592323383490196
  episode_reward_mean: 51.38037858634514
  episode_reward_min: 46.53863544200248
  episodes_this_iter: 8
  episodes_total: 28296
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2830336
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004515982698649168
        max_q: 2.68005633354187
        mean_q: 2.5215556621551514
        mean_td_error: -0.04167044162750244
        min_q: 2.422666549682617
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005862555932253599
        max_q: 4.660311222076416
        mean_q: 4.6227521896362305
        mean_td_error: -0.11133840680122375
        min_q: 4.56406831741333
    num_steps_sampled: 2830336
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2764,6303.01,2830336,51.3804,54.5923,46.5386,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-54-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.061509164667676
  episode_reward_mean: 51.65337069873656
  episode_reward_min: 46.53863544200248
  episodes_this_iter: 8
  episodes_total: 28328
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2833408
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006176082417368889
        max_q: 2.7788214683532715
        mean_q: 2.6780569553375244
        mean_td_error: -0.07955378293991089
        min_q: 2.5654923915863037
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011224107583984733
        max_q: 4.775344371795654
        mean_q: 4.648911476135254
        mean_td_error: -0.0007937699556350708
        min_q: 4.514819145202637
    num_steps_sampled: 2833408
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2767,6310.17,2833408,51.6534,54.0615,46.5386,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-54-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.061509164667676
  episode_reward_mean: 52.14946086289536
  episode_reward_min: 48.83810686935855
  episodes_this_iter: 8
  episodes_total: 28352
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2835456
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0052889203652739525
        max_q: 2.7987630367279053
        mean_q: 2.673933506011963
        mean_td_error: -0.06292013823986053
        min_q: 2.4081108570098877
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003438811283558607
        max_q: 4.642258167266846
        mean_q: 4.48175048828125
        mean_td_error: 0.06573009490966797
        min_q: 4.329292297363281
    num_steps_sampled: 2835456
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2769,6315.38,2835456,52.1495,54.0615,48.8381,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-54-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.061509164667676
  episode_reward_mean: 51.83560865832638
  episode_reward_min: 49.209210240342166
  episodes_this_iter: 8
  episodes_total: 28368
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2837504
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01120366994291544
        max_q: 2.629333972930908
        mean_q: 2.4518017768859863
        mean_td_error: -0.13252097368240356
        min_q: 2.3227663040161133
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004131437744945288
        max_q: 4.65049934387207
        mean_q: 4.195856094360352
        mean_td_error: -0.007240869104862213
        min_q: 3.969740629196167
    num_steps_sampled: 2837504
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2771,6320.33,2837504,51.8356,54.0615,49.2092,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-54-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.80775960784088
  episode_reward_mean: 51.08586975279482
  episode_reward_min: 47.197199601372034
  episodes_this_iter: 8
  episodes_total: 28392
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2839552
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006129009649157524
        max_q: 2.6706223487854004
        mean_q: 2.5452868938446045
        mean_td_error: -0.08133918046951294
        min_q: 2.473926544189453
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007642757147550583
        max_q: 4.243270397186279
        mean_q: 4.090778350830078
        mean_td_error: -0.13090160489082336
        min_q: 3.972801923751831
    num_steps_sampled: 2839552
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2773,6325.28,2839552,51.0859,53.8078,47.1972,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-54-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.19533299942286
  episode_reward_mean: 50.925501157753516
  episode_reward_min: 47.197199601372034
  episodes_this_iter: 16
  episodes_total: 28416
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2841600
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013302057050168514
        max_q: 2.70890212059021
        mean_q: 2.5481209754943848
        mean_td_error: -0.16150519251823425
        min_q: 2.4528682231903076
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0070401630364358425
        max_q: 3.965630531311035
        mean_q: 3.8365747928619385
        mean_td_error: 0.13843734562397003
        min_q: 3.693389415740967
    num_steps_sampled: 2841600
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2775,6330.24,2841600,50.9255,55.1953,47.1972,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-54-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.19533299942286
  episode_reward_mean: 50.759478939249405
  episode_reward_min: 47.197199601372034
  episodes_this_iter: 8
  episodes_total: 28432
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2843648
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008286327123641968
        max_q: 3.1694583892822266
        mean_q: 2.862755298614502
        mean_td_error: -0.10698439180850983
        min_q: 2.698733329772949
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005734991282224655
        max_q: 3.704416513442993
        mean_q: 3.505232334136963
        mean_td_error: -0.08756709098815918
        min_q: 3.41827392578125
    num_steps_sampled: 2843648
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2777,6335.19,2843648,50.7595,55.1953,47.1972,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-54-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.19533299942286
  episode_reward_mean: 51.09224299703059
  episode_reward_min: 47.197199601372034
  episodes_this_iter: 16
  episodes_total: 28456
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2845696
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006945318076759577
        max_q: 3.193020820617676
        mean_q: 3.060084104537964
        mean_td_error: 0.0823945701122284
        min_q: 2.9636192321777344
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007486389484256506
        max_q: 3.606717109680176
        mean_q: 3.4998855590820312
        mean_td_error: -0.12225323170423508
        min_q: 3.3898110389709473
    num_steps_sampled: 2845696
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2779,6340.39,2845696,51.0922,55.1953,47.1972,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-54-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.38061076287962
  episode_reward_mean: 51.60927949458912
  episode_reward_min: 47.197199601372034
  episodes_this_iter: 8
  episodes_total: 28472
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2847744
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012681251391768456
        max_q: 3.2921741008758545
        mean_q: 3.147083044052124
        mean_td_error: -0.15409213304519653
        min_q: 3.0440502166748047
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019728977233171463
        max_q: 3.7168800830841064
        mean_q: 3.5164847373962402
        mean_td_error: -0.24163775146007538
        min_q: 3.3846168518066406
    num_steps_sampled: 2847744
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2781,6345.97,2847744,51.6093,56.3806,47.1972,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-54-53
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.38061076287962
  episode_reward_mean: 52.646852188878256
  episode_reward_min: 47.32134426007928
  episodes_this_iter: 16
  episodes_total: 28496
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2849792
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008193623274564743
        max_q: 3.386737585067749
        mean_q: 3.233816385269165
        mean_td_error: -0.13296698033809662
        min_q: 3.0938830375671387
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00600214721634984
        max_q: 3.7348744869232178
        mean_q: 3.5601859092712402
        mean_td_error: 0.050799041986465454
        min_q: 3.192141056060791
    num_steps_sampled: 2849792
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2783,6351.89,2849792,52.6469,56.3806,47.3213,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-54-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.38061076287962
  episode_reward_mean: 52.42997644174668
  episode_reward_min: 47.32134426007928
  episodes_this_iter: 8
  episodes_total: 28512
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2851840
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020001002121716738
        max_q: 3.5029828548431396
        mean_q: 3.3954520225524902
        mean_td_error: -0.010718293488025665
        min_q: 3.2471790313720703
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0035828156396746635
        max_q: 3.388899326324463
        mean_q: 3.0882203578948975
        mean_td_error: -0.0020272210240364075
        min_q: 2.956350803375244
    num_steps_sampled: 2851840
    num_steps

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2785,6357.86,2851840,52.43,56.3806,47.3213,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-55-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.38061076287962
  episode_reward_mean: 51.91117682198158
  episode_reward_min: 46.49592936639369
  episodes_this_iter: 8
  episodes_total: 28536
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2853888
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002169978339225054
        max_q: 3.3894026279449463
        mean_q: 3.3141050338745117
        mean_td_error: -0.028173603117465973
        min_q: 3.261821985244751
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009472524747252464
        max_q: 3.250248908996582
        mean_q: 3.1086673736572266
        mean_td_error: -0.10680685192346573
        min_q: 3.0136168003082275
    num_steps_sampled: 2853888
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2787,6363.94,2853888,51.9112,56.3806,46.4959,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-55-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.38061076287962
  episode_reward_mean: 51.13175965891515
  episode_reward_min: 46.49592936639369
  episodes_this_iter: 8
  episodes_total: 28552
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2855936
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004219308961182833
        max_q: 3.542631149291992
        mean_q: 3.4129035472869873
        mean_td_error: -0.059123873710632324
        min_q: 3.335559129714966
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0039792051538825035
        max_q: 3.5225679874420166
        mean_q: 3.3552942276000977
        mean_td_error: -0.032054267823696136
        min_q: 3.18485951423645
    num_steps_sampled: 2855936
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2789,6370.43,2855936,51.1318,56.3806,46.4959,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-55-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.18475908524025
  episode_reward_mean: 50.5652267657118
  episode_reward_min: 46.49592936639369
  episodes_this_iter: 8
  episodes_total: 28576
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2857984
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024135583080351353
        max_q: 3.7136147022247314
        mean_q: 3.6251893043518066
        mean_td_error: -0.03529224544763565
        min_q: 3.516570568084717
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.022076070308685303
        max_q: 3.427236557006836
        mean_q: 3.2443788051605225
        mean_td_error: -0.22461017966270447
        min_q: 3.1504974365234375
    num_steps_sampled: 2857984
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2791,6376.47,2857984,50.5652,54.1848,46.4959,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-55-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.62646840115621
  episode_reward_mean: 51.03867940307161
  episode_reward_min: 46.49592936639369
  episodes_this_iter: 16
  episodes_total: 28600
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2860032
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005034719128161669
        max_q: 3.778982162475586
        mean_q: 3.692614793777466
        mean_td_error: 0.08277663588523865
        min_q: 3.515829563140869
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010150264017283916
        max_q: 3.396324634552002
        mean_q: 3.249920129776001
        mean_td_error: -0.10844694823026657
        min_q: 3.1734728813171387
    num_steps_sampled: 2860032
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2793,6382.52,2860032,51.0387,57.6265,46.4959,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-55-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.62646840115621
  episode_reward_mean: 51.76139881350702
  episode_reward_min: 46.49592936639369
  episodes_this_iter: 8
  episodes_total: 28616
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2862080
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004862538073211908
        max_q: 4.071388244628906
        mean_q: 3.9558351039886475
        mean_td_error: -0.062034398317337036
        min_q: 3.7707698345184326
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0168859101831913
        max_q: 3.4951937198638916
        mean_q: 3.2343368530273438
        mean_td_error: -0.16401751339435577
        min_q: 2.9648678302764893
    num_steps_sampled: 2862080
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2795,6388.13,2862080,51.7614,57.6265,46.4959,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-55-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.047799450270546
  episode_reward_mean: 53.410049550687475
  episode_reward_min: 46.86611179461069
  episodes_this_iter: 16
  episodes_total: 28640
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2864128
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008854163810610771
        max_q: 4.199676990509033
        mean_q: 3.935680627822876
        mean_td_error: -0.14092472195625305
        min_q: 3.7538235187530518
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01952165737748146
        max_q: 3.045813798904419
        mean_q: 2.8507614135742188
        mean_td_error: -0.21453337371349335
        min_q: 2.526252269744873
    num_steps_sampled: 2864128
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2797,6393.92,2864128,53.41,58.0478,46.8661,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-55-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.047799450270546
  episode_reward_mean: 54.425821073230225
  episode_reward_min: 46.86611179461069
  episodes_this_iter: 8
  episodes_total: 28656
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2866176
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0038530235178768635
        max_q: 3.715996742248535
        mean_q: 3.548308849334717
        mean_td_error: 0.03750339150428772
        min_q: 3.3253135681152344
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.027008583769202232
        max_q: 2.5671017169952393
        mean_q: 2.1625137329101562
        mean_td_error: -0.27656713128089905
        min_q: 1.8288220167160034
    num_steps_sampled: 2866176
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2799,6400.24,2866176,54.4258,58.0478,46.8661,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-55-49
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.13434610738213
  episode_reward_mean: 54.94328264547399
  episode_reward_min: 47.66604616371145
  episodes_this_iter: 8
  episodes_total: 28680
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2868224
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006219709292054176
        max_q: 3.9686243534088135
        mean_q: 3.879499912261963
        mean_td_error: -0.09725643694400787
        min_q: 3.8073790073394775
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02240588515996933
        max_q: 2.07098388671875
        mean_q: 1.9693273305892944
        mean_td_error: -0.25567901134490967
        min_q: 1.7813774347305298
    num_steps_sampled: 2868224
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2801,6406.75,2868224,54.9433,59.1343,47.666,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-55-56
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.13434610738213
  episode_reward_mean: 54.88348344929305
  episode_reward_min: 47.66604616371145
  episodes_this_iter: 8
  episodes_total: 28696
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2870272
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009049021638929844
        max_q: 3.8091630935668945
        mean_q: 3.6827340126037598
        mean_td_error: -0.13874590396881104
        min_q: 3.50410795211792
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010344377718865871
        max_q: 2.571976900100708
        mean_q: 2.2677252292633057
        mean_td_error: -0.10028482973575592
        min_q: 2.1168041229248047
    num_steps_sampled: 2870272
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2803,6413.15,2870272,54.8835,59.1343,47.666,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-56-02
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.13434610738213
  episode_reward_mean: 54.594773425863835
  episode_reward_min: 47.66604616371145
  episodes_this_iter: 8
  episodes_total: 28720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2872320
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012390361167490482
        max_q: 3.789351463317871
        mean_q: 3.5410995483398438
        mean_td_error: -0.1834900975227356
        min_q: 3.3908066749572754
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01715237833559513
        max_q: 2.325814723968506
        mean_q: 2.0731844902038574
        mean_td_error: -0.17458206415176392
        min_q: 1.9420169591903687
    num_steps_sampled: 2872320
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2805,6419.11,2872320,54.5948,59.1343,47.666,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-56-08
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.13434610738213
  episode_reward_mean: 54.72697129408854
  episode_reward_min: 47.66604616371145
  episodes_this_iter: 8
  episodes_total: 28736
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2874368
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01064465194940567
        max_q: 3.8279225826263428
        mean_q: 3.2905113697052
        mean_td_error: -0.14541277289390564
        min_q: 3.0337045192718506
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01410166546702385
        max_q: 2.198673725128174
        mean_q: 1.858895182609558
        mean_td_error: -0.12195923924446106
        min_q: 1.4033660888671875
    num_steps_sampled: 2874368
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2807,6425.34,2874368,54.727,59.1343,47.666,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-56-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 59.13434610738213
  episode_reward_mean: 54.99339393727776
  episode_reward_min: 47.66604616371145
  episodes_this_iter: 8
  episodes_total: 28760
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2876416
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02516324259340763
        max_q: 2.9887681007385254
        mean_q: 2.680962324142456
        mean_td_error: -0.3812617063522339
        min_q: 2.544403314590454
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012647449038922787
        max_q: 1.7033758163452148
        mean_q: 1.5591089725494385
        mean_td_error: -0.13404108583927155
        min_q: 1.4545750617980957
    num_steps_sampled: 2876416
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2809,6431.53,2876416,54.9934,59.1343,47.666,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-56-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.414808608779005
  episode_reward_mean: 55.663553330614306
  episode_reward_min: 50.96853033905475
  episodes_this_iter: 16
  episodes_total: 28784
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2878464
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008380304090678692
        max_q: 2.9966773986816406
        mean_q: 2.7228047847747803
        mean_td_error: -0.12829719483852386
        min_q: 2.5081796646118164
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012150486931204796
        max_q: 2.035891532897949
        mean_q: 1.8290225267410278
        mean_td_error: -0.1082962155342102
        min_q: 1.713377594947815
    num_steps_sampled: 2878464
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2811,6437.22,2878464,55.6636,58.4148,50.9685,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-56-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.414808608779005
  episode_reward_mean: 55.59912843578476
  episode_reward_min: 51.133077951884026
  episodes_this_iter: 8
  episodes_total: 28800
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2880512
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005135887768119574
        max_q: 2.924675703048706
        mean_q: 2.7000503540039062
        mean_td_error: -0.06870625913143158
        min_q: 2.5414412021636963
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004903400782495737
        max_q: 2.563009262084961
        mean_q: 2.373166084289551
        mean_td_error: -0.035483017563819885
        min_q: 2.156531572341919
    num_steps_sampled: 2880512
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2813,6442.1,2880512,55.5991,58.4148,51.1331,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-56-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.414808608779005
  episode_reward_mean: 54.27005770554092
  episode_reward_min: 49.736151697042935
  episodes_this_iter: 8
  episodes_total: 28832
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2883584
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010715797543525696
        max_q: 3.0147757530212402
        mean_q: 2.7702291011810303
        mean_td_error: -0.15071949362754822
        min_q: 2.6416115760803223
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0073760193772614
        max_q: 2.5261003971099854
        mean_q: 2.3974406719207764
        mean_td_error: 0.04611653834581375
        min_q: 2.212015151977539
    num_steps_sampled: 2883584
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2816,6448.63,2883584,54.2701,58.4148,49.7362,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-56-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.414808608779005
  episode_reward_mean: 53.40290430335395
  episode_reward_min: 49.736151697042935
  episodes_this_iter: 8
  episodes_total: 28864
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2886656
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002473636530339718
        max_q: 3.429813861846924
        mean_q: 3.3288865089416504
        mean_td_error: -0.027341313660144806
        min_q: 3.109513759613037
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002910834504291415
        max_q: 2.7912750244140625
        mean_q: 2.5737290382385254
        mean_td_error: 0.0095711350440979
        min_q: 2.4471256732940674
    num_steps_sampled: 2886656
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2819,6454.72,2886656,53.4029,58.4148,49.7362,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-56-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.151470992492015
  episode_reward_mean: 52.722518838468986
  episode_reward_min: 49.736151697042935
  episodes_this_iter: 16
  episodes_total: 28896
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2889728
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0034791333600878716
        max_q: 3.8653533458709717
        mean_q: 3.781730890274048
        mean_td_error: -0.05507965385913849
        min_q: 3.711393117904663
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008087541908025742
        max_q: 2.8092360496520996
        mean_q: 2.6463937759399414
        mean_td_error: 0.15122509002685547
        min_q: 2.5295345783233643
    num_steps_sampled: 2889728
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2822,6461.34,2889728,52.7225,57.1515,49.7362,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-56-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.80968498971953
  episode_reward_mean: 52.484492914851174
  episode_reward_min: 44.48970027644302
  episodes_this_iter: 16
  episodes_total: 28928
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2892800
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016177013516426086
        max_q: 3.868368148803711
        mean_q: 3.762277603149414
        mean_td_error: -0.2519546449184418
        min_q: 3.65620756149292
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0053392737172544
        max_q: 3.7854812145233154
        mean_q: 3.699526786804199
        mean_td_error: 0.08790614455938339
        min_q: 3.5544185638427734
    num_steps_sampled: 2892800
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2825,6467.93,2892800,52.4845,55.8097,44.4897,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-56-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.80968498971953
  episode_reward_mean: 52.45482342865145
  episode_reward_min: 44.48970027644302
  episodes_this_iter: 8
  episodes_total: 28952
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2895872
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.006901079323142767
        max_q: 3.929417371749878
        mean_q: 3.8018457889556885
        mean_td_error: -0.05354955792427063
        min_q: 3.6493160724639893
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009024680592119694
        max_q: 4.351843357086182
        mean_q: 4.251807689666748
        mean_td_error: 0.17716149985790253
        min_q: 4.144351482391357
    num_steps_sampled: 2895872
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2828,6474.46,2895872,52.4548,55.8097,44.4897,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-57-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.548459074963446
  episode_reward_mean: 52.439092346680454
  episode_reward_min: 44.48970027644302
  episodes_this_iter: 8
  episodes_total: 28984
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2898944
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017440255731344223
        max_q: 3.9271886348724365
        mean_q: 2.8658394813537598
        mean_td_error: -0.1497701108455658
        min_q: 2.2131519317626953
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015974685549736023
        max_q: 4.285344123840332
        mean_q: 4.138576030731201
        mean_td_error: -0.13095808029174805
        min_q: 4.023955345153809
    num_steps_sampled: 2898944
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2831,6481.26,2898944,52.4391,58.5485,44.4897,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-57-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.548459074963446
  episode_reward_mean: 52.81788808364397
  episode_reward_min: 44.48970027644302
  episodes_this_iter: 8
  episodes_total: 29016
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2902016
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0862351655960083
        max_q: 3.1470835208892822
        mean_q: 2.707416296005249
        mean_td_error: 0.9486774802207947
        min_q: 2.0874757766723633
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01552277896553278
        max_q: 4.165962219238281
        mean_q: 3.9063963890075684
        mean_td_error: 0.1745774745941162
        min_q: 3.5290310382843018
    num_steps_sampled: 2902016
    num_steps_trained: 2

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2834,6487.85,2902016,52.8179,58.5485,44.4897,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-57-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.548459074963446
  episode_reward_mean: 53.45816307553531
  episode_reward_min: 45.927073217403574
  episodes_this_iter: 8
  episodes_total: 29048
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2905088
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013443311676383018
        max_q: 2.6249592304229736
        mean_q: 2.52658748626709
        mean_td_error: 0.12853452563285828
        min_q: 2.4223198890686035
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.05081731453537941
        max_q: 3.4433226585388184
        mean_q: 3.0415987968444824
        mean_td_error: -0.5407196283340454
        min_q: 2.7375588417053223
    num_steps_sampled: 2905088
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2837,6494.37,2905088,53.4582,58.5485,45.9271,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-57-26
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.71289483744253
  episode_reward_mean: 53.38823640637107
  episode_reward_min: 45.927073217403574
  episodes_this_iter: 16
  episodes_total: 29080
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2908160
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005212388467043638
        max_q: 3.414621353149414
        mean_q: 3.3623650074005127
        mean_td_error: 0.054901666939258575
        min_q: 3.19881272315979
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.021869055926799774
        max_q: 3.089262008666992
        mean_q: 2.9729552268981934
        mean_td_error: -0.2315659523010254
        min_q: 2.881568193435669
    num_steps_sampled: 2908160
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2840,6500.87,2908160,53.3882,57.7129,45.9271,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-57-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.71289483744253
  episode_reward_mean: 52.66480086616541
  episode_reward_min: 45.927073217403574
  episodes_this_iter: 16
  episodes_total: 29112
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2911232
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003595436690375209
        max_q: 4.069157600402832
        mean_q: 3.93794322013855
        mean_td_error: 0.026318274438381195
        min_q: 3.8364129066467285
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003959058318287134
        max_q: 3.326141119003296
        mean_q: 3.2019662857055664
        mean_td_error: -0.05012571066617966
        min_q: 3.1215639114379883
    num_steps_sampled: 2911232
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2843,6507.26,2911232,52.6648,57.7129,45.9271,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-57-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.55836287728947
  episode_reward_mean: 51.7562938757906
  episode_reward_min: 45.927073217403574
  episodes_this_iter: 8
  episodes_total: 29136
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2914304
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017002198146656156
        max_q: 4.2288360595703125
        mean_q: 4.13104248046875
        mean_td_error: 0.003406934440135956
        min_q: 4.063510894775391
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0025828154757618904
        max_q: 3.927738666534424
        mean_q: 3.894278049468994
        mean_td_error: -0.03373074531555176
        min_q: 3.7774569988250732
    num_steps_sampled: 2914304
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2846,6513.47,2914304,51.7563,56.5584,45.9271,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-57-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.36647910654432
  episode_reward_mean: 51.448173903938766
  episode_reward_min: 47.749675635526046
  episodes_this_iter: 8
  episodes_total: 29168
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2917376
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015117630362510681
        max_q: 3.9583823680877686
        mean_q: 3.8446860313415527
        mean_td_error: -0.15293428301811218
        min_q: 3.6875500679016113
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005421742796897888
        max_q: 4.3456902503967285
        mean_q: 4.2894110679626465
        mean_td_error: -0.07281593978404999
        min_q: 4.21280574798584
    num_steps_sampled: 2917376
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2849,6519.91,2917376,51.4482,56.3665,47.7497,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-57-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.76803828351845
  episode_reward_mean: 50.22540824429878
  episode_reward_min: 47.20787377720544
  episodes_this_iter: 8
  episodes_total: 29200
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2920448
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007054820191115141
        max_q: 4.14304256439209
        mean_q: 4.080038070678711
        mean_td_error: 0.07276400923728943
        min_q: 3.953594207763672
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021650190465152264
        max_q: 4.471863746643066
        mean_q: 4.4228715896606445
        mean_td_error: -0.01988312602043152
        min_q: 4.343247890472412
    num_steps_sampled: 2920448
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2852,6526.48,2920448,50.2254,54.768,47.2079,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-57-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.89086498986167
  episode_reward_mean: 49.616459478296655
  episode_reward_min: 45.776102772539936
  episodes_this_iter: 8
  episodes_total: 29232
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2923520
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0027941821608692408
        max_q: 4.379170894622803
        mean_q: 4.321752548217773
        mean_td_error: 0.047875791788101196
        min_q: 4.213340759277344
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0052083758637309074
        max_q: 4.4023637771606445
        mean_q: 4.317831039428711
        mean_td_error: 0.07717855274677277
        min_q: 4.261927127838135
    num_steps_sampled: 2923520
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2855,6533.16,2923520,49.6165,53.8909,45.7761,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-58-07
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 52.45281151049951
  episode_reward_mean: 48.97260698066778
  episode_reward_min: 45.776102772539936
  episodes_this_iter: 16
  episodes_total: 29264
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2926592
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001339914626441896
        max_q: 4.796547889709473
        mean_q: 4.758924961090088
        mean_td_error: 0.013676822185516357
        min_q: 4.666323184967041
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002658622805029154
        max_q: 4.414470672607422
        mean_q: 4.372147560119629
        mean_td_error: -0.033399373292922974
        min_q: 4.331362724304199
    num_steps_sampled: 2926592
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2858,6540.22,2926592,48.9726,52.4528,45.7761,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-58-14
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 52.45281151049951
  episode_reward_mean: 48.445052813935234
  episode_reward_min: 45.776102772539936
  episodes_this_iter: 16
  episodes_total: 29296
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2929664
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0023226693738251925
        max_q: 5.048646926879883
        mean_q: 4.990582466125488
        mean_td_error: 0.03697596490383148
        min_q: 4.856269359588623
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016226002480834723
        max_q: 4.554145812988281
        mean_q: 4.460479259490967
        mean_td_error: -0.008623972535133362
        min_q: 4.3232421875
    num_steps_sampled: 2929664
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2861,6547.22,2929664,48.4451,52.4528,45.7761,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-58-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.24945269818142
  episode_reward_mean: 49.086436644537535
  episode_reward_min: 45.776102772539936
  episodes_this_iter: 8
  episodes_total: 29312
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2931712
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012128872331231833
        max_q: 4.955049991607666
        mean_q: 4.896960258483887
        mean_td_error: 0.018694505095481873
        min_q: 4.741854667663574
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003731023520231247
        max_q: 4.3833723068237305
        mean_q: 4.290684223175049
        mean_td_error: -0.05296534299850464
        min_q: 4.190953254699707
    num_steps_sampled: 2931712
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2863,6552.16,2931712,49.0864,53.2495,45.7761,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-58-24
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.24945269818142
  episode_reward_mean: 49.004236161012116
  episode_reward_min: 46.31931916186566
  episodes_this_iter: 16
  episodes_total: 29336
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2933760
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012027224292978644
        max_q: 4.932608604431152
        mean_q: 4.878900527954102
        mean_td_error: 0.006081104278564453
        min_q: 4.78837776184082
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022595738992094994
        max_q: 4.477575302124023
        mean_q: 4.36177396774292
        mean_td_error: -0.023481041193008423
        min_q: 4.25429105758667
    num_steps_sampled: 2933760
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2865,6557.39,2933760,49.0042,53.2495,46.3193,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-58-30
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.24945269818142
  episode_reward_mean: 49.257655650057714
  episode_reward_min: 46.31931916186566
  episodes_this_iter: 8
  episodes_total: 29352
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2935808
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019308918854221702
        max_q: 4.981791019439697
        mean_q: 4.855243682861328
        mean_td_error: 0.030331403017044067
        min_q: 4.775813102722168
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0044382004998624325
        max_q: 4.460300445556641
        mean_q: 4.322633743286133
        mean_td_error: 0.01777215301990509
        min_q: 4.12750768661499
    num_steps_sampled: 2935808
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2867,6562.78,2935808,49.2577,53.2495,46.3193,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-58-36
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.24945269818142
  episode_reward_mean: 49.623791050237266
  episode_reward_min: 46.66703764151011
  episodes_this_iter: 8
  episodes_total: 29376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2937856
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004945034626871347
        max_q: 4.899533748626709
        mean_q: 4.807651042938232
        mean_td_error: 0.10177145898342133
        min_q: 4.666999816894531
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0017761364579200745
        max_q: 4.361233711242676
        mean_q: 4.331243515014648
        mean_td_error: 0.02679997682571411
        min_q: 4.266763210296631
    num_steps_sampled: 2937856
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2869,6568.36,2937856,49.6238,53.2495,46.667,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-58-42
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.24945269818142
  episode_reward_mean: 49.923588598297336
  episode_reward_min: 46.94229780713819
  episodes_this_iter: 8
  episodes_total: 29392
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2939904
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003966317046433687
        max_q: 4.846830368041992
        mean_q: 4.784927845001221
        mean_td_error: -0.06469884514808655
        min_q: 4.726247310638428
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020823576487600803
        max_q: 4.578826904296875
        mean_q: 4.509766578674316
        mean_td_error: 0.0417599081993103
        min_q: 4.458856582641602
    num_steps_sampled: 2939904
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2871,6574.33,2939904,49.9236,53.2495,46.9423,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-58-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.884272841641234
  episode_reward_mean: 49.85224655789554
  episode_reward_min: 47.8106585322672
  episodes_this_iter: 8
  episodes_total: 29416
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2941952
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003006059443578124
        max_q: 4.84317684173584
        mean_q: 4.77748966217041
        mean_td_error: 0.06252473592758179
        min_q: 4.6308979988098145
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0011994238011538982
        max_q: 4.618422508239746
        mean_q: 4.560724258422852
        mean_td_error: -0.022196099162101746
        min_q: 4.398327827453613
    num_steps_sampled: 2941952
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2873,6580.48,2941952,49.8522,54.8843,47.8107,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-58-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.884272841641234
  episode_reward_mean: 49.37368789765785
  episode_reward_min: 46.26421090846455
  episodes_this_iter: 16
  episodes_total: 29440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2944000
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010308645432814956
        max_q: 4.814116954803467
        mean_q: 4.764856338500977
        mean_td_error: -0.0072066038846969604
        min_q: 4.658878803253174
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008359373896382749
        max_q: 4.674691677093506
        mean_q: 4.642688751220703
        mean_td_error: -0.014794200658798218
        min_q: 4.501662254333496
    num_steps_sampled: 2944000
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2875,6586.21,2944000,49.3737,54.8843,46.2642,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-58-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.884272841641234
  episode_reward_mean: 49.18391292288224
  episode_reward_min: 46.26421090846455
  episodes_this_iter: 8
  episodes_total: 29456
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2946048
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0010756076080724597
        max_q: 4.8344831466674805
        mean_q: 4.814704895019531
        mean_td_error: -0.01369708776473999
        min_q: 4.781164169311523
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0024090695660561323
        max_q: 4.6747589111328125
        mean_q: 4.5813069343566895
        mean_td_error: -0.037079423666000366
        min_q: 4.5019097328186035
    num_steps_sampled: 2946048
    num_steps_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2877,6591.57,2946048,49.1839,54.8843,46.2642,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-59-05
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.884272841641234
  episode_reward_mean: 49.473346311057156
  episode_reward_min: 46.26421090846455
  episodes_this_iter: 16
  episodes_total: 29480
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2948096
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018139634048566222
        max_q: 4.840421199798584
        mean_q: 4.783319473266602
        mean_td_error: 0.018126577138900757
        min_q: 4.720874786376953
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016356345731765032
        max_q: 4.598049163818359
        mean_q: 4.536029815673828
        mean_td_error: -0.03270789980888367
        min_q: 4.464599609375
    num_steps_sampled: 2948096
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2879,6596.71,2948096,49.4733,54.8843,46.2642,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-59-12
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.884272841641234
  episode_reward_mean: 49.991852784883264
  episode_reward_min: 46.26421090846455
  episodes_this_iter: 8
  episodes_total: 29504
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2951168
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0007630888721905649
        max_q: 4.7787556648254395
        mean_q: 4.738219261169434
        mean_td_error: 0.005556806921958923
        min_q: 4.668066501617432
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0008437852957285941
        max_q: 4.629793167114258
        mean_q: 4.542938232421875
        mean_td_error: -0.012445211410522461
        min_q: 4.413282871246338
    num_steps_sampled: 2951168
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2882,6603.64,2951168,49.9919,54.8843,46.2642,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-59-19
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.33488170289601
  episode_reward_mean: 50.2769691077082
  episode_reward_min: 46.26421090846455
  episodes_this_iter: 8
  episodes_total: 29536
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2954240
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001058190711773932
        max_q: 4.713587284088135
        mean_q: 4.657994747161865
        mean_td_error: -0.01745443046092987
        min_q: 4.600945472717285
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0020101331174373627
        max_q: 4.642276763916016
        mean_q: 4.583256244659424
        mean_td_error: -0.04193021357059479
        min_q: 4.541985034942627
    num_steps_sampled: 2954240
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2885,6610.26,2954240,50.277,53.3349,46.2642,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-59-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.33488170289601
  episode_reward_mean: 49.997863648952034
  episode_reward_min: 44.162200977987524
  episodes_this_iter: 8
  episodes_total: 29568
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2957312
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003678493667393923
        max_q: 4.678071975708008
        mean_q: 4.578334808349609
        mean_td_error: -0.06802161037921906
        min_q: 4.509115219116211
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019233846105635166
        max_q: 4.640030384063721
        mean_q: 4.554449558258057
        mean_td_error: -0.03659486770629883
        min_q: 4.436010360717773
    num_steps_sampled: 2957312
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2888,6616.69,2957312,49.9979,53.3349,44.1622,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-59-32
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.52194753827944
  episode_reward_mean: 49.2301505281378
  episode_reward_min: 44.162200977987524
  episodes_this_iter: 8
  episodes_total: 29600
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2960384
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0022690468467772007
        max_q: 4.648156642913818
        mean_q: 4.503289222717285
        mean_td_error: 0.009339481592178345
        min_q: 4.413754940032959
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0014235432026907802
        max_q: 4.703890323638916
        mean_q: 4.633035659790039
        mean_td_error: -0.025834694504737854
        min_q: 4.554038047790527
    num_steps_sampled: 2960384
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2891,6623.16,2960384,49.2302,53.5219,44.1622,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-59-39
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.52194753827944
  episode_reward_mean: 47.78666598082473
  episode_reward_min: 44.162200977987524
  episodes_this_iter: 8
  episodes_total: 29632
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2963456
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0018795179203152657
        max_q: 4.629271030426025
        mean_q: 4.53190279006958
        mean_td_error: -0.002233058214187622
        min_q: 4.396971225738525
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0015691989101469517
        max_q: 4.743518829345703
        mean_q: 4.6908745765686035
        mean_td_error: -0.02868792414665222
        min_q: 4.62128210067749
    num_steps_sampled: 2963456
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2894,6629.66,2963456,47.7867,53.5219,44.1622,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-59-45
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.52194753827944
  episode_reward_mean: 48.50781000931565
  episode_reward_min: 44.195698575787205
  episodes_this_iter: 16
  episodes_total: 29664
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2966528
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033448338508605957
        max_q: 4.595597743988037
        mean_q: 4.340642929077148
        mean_td_error: 0.00626295804977417
        min_q: 4.002508163452148
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0021424528677016497
        max_q: 4.702699184417725
        mean_q: 4.59324836730957
        mean_td_error: -0.035033464431762695
        min_q: 4.352221965789795
    num_steps_sampled: 2966528
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2897,6636.21,2966528,48.5078,53.5219,44.1957,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-59-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.71410959555466
  episode_reward_mean: 48.930600995487914
  episode_reward_min: 42.32230610572541
  episodes_this_iter: 16
  episodes_total: 29696
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2969600
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03833092376589775
        max_q: 4.355634689331055
        mean_q: 3.4761714935302734
        mean_td_error: -0.8543887138366699
        min_q: 2.927885055541992
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019039519131183624
        max_q: 4.459383010864258
        mean_q: 3.999516725540161
        mean_td_error: -0.31030818819999695
        min_q: 3.8307688236236572
    num_steps_sampled: 2969600
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2900,6642.77,2969600,48.9306,56.7141,42.3223,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_14-59-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.71410959555466
  episode_reward_mean: 49.346725240792004
  episode_reward_min: 42.32230610572541
  episodes_this_iter: 8
  episodes_total: 29720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2972672
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005489212926477194
        max_q: 3.543219566345215
        mean_q: 2.8027756214141846
        mean_td_error: 0.012563295662403107
        min_q: 2.467013120651245
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005561061203479767
        max_q: 4.293975830078125
        mean_q: 4.159346580505371
        mean_td_error: -0.06905525922775269
        min_q: 4.108151435852051
    num_steps_sampled: 2972672
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2903,6649.25,2972672,49.3467,56.7141,42.3223,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-00-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.71410959555466
  episode_reward_mean: 50.70602087819938
  episode_reward_min: 42.32230610572541
  episodes_this_iter: 8
  episodes_total: 29752
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2975744
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004317126702517271
        max_q: 2.712716817855835
        mean_q: 2.53493070602417
        mean_td_error: 0.08786880970001221
        min_q: 2.3737571239471436
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00699650077149272
        max_q: 3.9551568031311035
        mean_q: 3.351374864578247
        mean_td_error: -0.07578376680612564
        min_q: 3.0948643684387207
    num_steps_sampled: 2975744
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2906,6655.78,2975744,50.706,56.7141,42.3223,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-00-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.71410959555466
  episode_reward_mean: 50.28070323962126
  episode_reward_min: 42.32230610572541
  episodes_this_iter: 8
  episodes_total: 29784
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2978816
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004203712102025747
        max_q: 3.3473806381225586
        mean_q: 3.2917792797088623
        mean_td_error: 0.0745701789855957
        min_q: 3.151149272918701
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001458704355172813
        max_q: 3.026237726211548
        mean_q: 2.9239156246185303
        mean_td_error: -0.011727072298526764
        min_q: 2.7691962718963623
    num_steps_sampled: 2978816
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2909,6662.71,2978816,50.2807,56.7141,42.3223,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-00-18
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.15398696484279
  episode_reward_mean: 51.28597522848685
  episode_reward_min: 45.30010800970215
  episodes_this_iter: 16
  episodes_total: 29808
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2980864
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0026956486981362104
        max_q: 3.8305587768554688
        mean_q: 3.7005395889282227
        mean_td_error: 0.057867035269737244
        min_q: 3.559048891067505
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007221321575343609
        max_q: 3.045513868331909
        mean_q: 2.838306188583374
        mean_td_error: -0.10361836850643158
        min_q: 2.7035746574401855
    num_steps_sampled: 2980864
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2911,6667.78,2980864,51.286,54.154,45.3001,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-00-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.15398696484279
  episode_reward_mean: 50.76645071227923
  episode_reward_min: 45.30010800970215
  episodes_this_iter: 8
  episodes_total: 29824
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2982912
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00595173379406333
        max_q: 4.165994644165039
        mean_q: 4.1202874183654785
        mean_td_error: 0.11257697641849518
        min_q: 3.9687745571136475
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009660586714744568
        max_q: 2.5921242237091064
        mean_q: 2.352163791656494
        mean_td_error: -0.17237228155136108
        min_q: 2.1051058769226074
    num_steps_sampled: 2982912
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2913,6673,2982912,50.7665,54.154,45.3001,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-00-29
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.15398696484279
  episode_reward_mean: 50.170144939468145
  episode_reward_min: 45.30010800970215
  episodes_this_iter: 16
  episodes_total: 29848
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2984960
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001999524189159274
        max_q: 4.237575054168701
        mean_q: 4.1727190017700195
        mean_td_error: 0.04118689149618149
        min_q: 4.137965202331543
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013274522498250008
        max_q: 2.2595012187957764
        mean_q: 2.0971291065216064
        mean_td_error: -0.21099935472011566
        min_q: 2.0026886463165283
    num_steps_sampled: 2984960
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2915,6678.67,2984960,50.1701,54.154,45.3001,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-00-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.663815621275624
  episode_reward_mean: 49.70937013932247
  episode_reward_min: 45.30010800970215
  episodes_this_iter: 8
  episodes_total: 29864
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2987008
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002765168435871601
        max_q: 4.242266654968262
        mean_q: 4.1481451988220215
        mean_td_error: -0.05334000289440155
        min_q: 4.100587844848633
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04176528751850128
        max_q: 2.0291950702667236
        mean_q: 1.498504638671875
        mean_td_error: -0.7366745471954346
        min_q: 1.297621250152588
    num_steps_sampled: 2987008
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2917,6684.49,2987008,49.7094,53.6638,45.3001,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-00-41
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.48082743595105
  episode_reward_mean: 50.3764737721356
  episode_reward_min: 46.98772508007881
  episodes_this_iter: 8
  episodes_total: 29888
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2989056
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0012942570028826594
        max_q: 4.264825344085693
        mean_q: 4.230581283569336
        mean_td_error: 0.02346496284008026
        min_q: 4.129498481750488
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.030237752944231033
        max_q: 1.5920196771621704
        mean_q: 1.3314096927642822
        mean_td_error: -0.5118385553359985
        min_q: 1.2029860019683838
    num_steps_sampled: 2989056
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2919,6690.31,2989056,50.3765,54.4808,46.9877,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-00-48
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.48082743595105
  episode_reward_mean: 50.206705708303545
  episode_reward_min: 46.98772508007881
  episodes_this_iter: 8
  episodes_total: 29904
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2991104
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00302277528680861
        max_q: 4.282057285308838
        mean_q: 4.196785926818848
        mean_td_error: -0.05210179090499878
        min_q: 4.074231147766113
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0244175773113966
        max_q: 1.5561342239379883
        mean_q: 1.3034778833389282
        mean_td_error: -0.34377577900886536
        min_q: 1.077712893486023
    num_steps_sampled: 2991104
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2921,6696.61,2991104,50.2067,54.4808,46.9877,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-00-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.57441011938023
  episode_reward_mean: 51.22622363352899
  episode_reward_min: 47.630015074078635
  episodes_this_iter: 8
  episodes_total: 29928
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2993152
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0019836961291730404
        max_q: 4.227192401885986
        mean_q: 4.133110046386719
        mean_td_error: -0.026005655527114868
        min_q: 4.06291389465332
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.007620656862854958
        max_q: 1.185439944267273
        mean_q: 0.9423309564590454
        mean_td_error: -0.09099467843770981
        min_q: 0.8127413988113403
    num_steps_sampled: 2993152
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2923,6702.54,2993152,51.2262,54.5744,47.63,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-00-59
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.22096014186244
  episode_reward_mean: 52.046766443901745
  episode_reward_min: 48.1153709274849
  episodes_this_iter: 16
  episodes_total: 29952
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2995200
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002902476815506816
        max_q: 4.370419979095459
        mean_q: 4.307165622711182
        mean_td_error: 0.05193820595741272
        min_q: 4.24635648727417
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.016354724764823914
        max_q: 1.4909967184066772
        mean_q: 0.8477861881256104
        mean_td_error: -0.19888901710510254
        min_q: 0.5525792837142944
    num_steps_sampled: 2995200
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2925,6707.72,2995200,52.0468,55.221,48.1154,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-01-06
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.31967616013392
  episode_reward_mean: 52.230073169298336
  episode_reward_min: 49.43953121251913
  episodes_this_iter: 8
  episodes_total: 29976
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2998272
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0016051172278821468
        max_q: 4.527615547180176
        mean_q: 4.364907264709473
        mean_td_error: -0.010973617434501648
        min_q: 4.265986919403076
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03152995556592941
        max_q: 0.9295607805252075
        mean_q: 0.42253637313842773
        mean_td_error: -0.4622150659561157
        min_q: -0.10857969522476196
    num_steps_sampled: 2998272
    num_steps_t

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2928,6714.75,2998272,52.2301,55.3197,49.4395,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-01-13
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.52931551125254
  episode_reward_mean: 52.942873887932464
  episode_reward_min: 50.53648473819334
  episodes_this_iter: 8
  episodes_total: 30008
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3001344
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00352566409856081
        max_q: 4.731590270996094
        mean_q: 4.601219654083252
        mean_td_error: 0.06800331175327301
        min_q: 4.3848090171813965
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008835292421281338
        max_q: 0.9351001977920532
        mean_q: 0.5274484157562256
        mean_td_error: 0.11865290254354477
        min_q: 0.33979374170303345
    num_steps_sampled: 3001344
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2931,6721.56,3001344,52.9429,55.5293,50.5365,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-01-20
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.56426443663257
  episode_reward_mean: 53.22529756495001
  episode_reward_min: 46.535185487718586
  episodes_this_iter: 8
  episodes_total: 30040
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3004416
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003882069606333971
        max_q: 4.452213287353516
        mean_q: 4.396418571472168
        mean_td_error: -0.07519413530826569
        min_q: 4.285740852355957
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02038108929991722
        max_q: 1.5620590448379517
        mean_q: 1.1844075918197632
        mean_td_error: -0.20002177357673645
        min_q: 1.019733190536499
    num_steps_sampled: 3004416
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2934,6728.04,3004416,53.2253,56.5643,46.5352,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-01-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.56426443663257
  episode_reward_mean: 52.369866023180194
  episode_reward_min: 46.535185487718586
  episodes_this_iter: 8
  episodes_total: 30072
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3007488
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.003753247205168009
        max_q: 4.616530895233154
        mean_q: 4.531254768371582
        mean_td_error: -0.04345603287220001
        min_q: 4.428168296813965
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013073354959487915
        max_q: 2.0517170429229736
        mean_q: 1.762405276298523
        mean_td_error: -0.12063740193843842
        min_q: 1.563928484916687
    num_steps_sampled: 3007488
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2937,6734.45,3007488,52.3699,56.5643,46.5352,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-01-34
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.56426443663257
  episode_reward_mean: 51.64813267522524
  episode_reward_min: 46.535185487718586
  episodes_this_iter: 16
  episodes_total: 30104
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3010560
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.001317076850682497
        max_q: 4.725148677825928
        mean_q: 4.62321662902832
        mean_td_error: 0.014635205268859863
        min_q: 4.542229652404785
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019053282216191292
        max_q: 2.2377779483795166
        mean_q: 1.9843251705169678
        mean_td_error: -0.1813458949327469
        min_q: 1.8147646188735962
    num_steps_sampled: 3010560
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2940,6741.15,3010560,51.6481,56.5643,46.5352,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-01-40
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.41368724298266
  episode_reward_mean: 50.83477114623566
  episode_reward_min: 47.2609473864124
  episodes_this_iter: 16
  episodes_total: 30136
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3013632
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002151498571038246
        max_q: 4.586451530456543
        mean_q: 4.484699726104736
        mean_td_error: -0.019147351384162903
        min_q: 4.30484676361084
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017017509788274765
        max_q: 1.8932442665100098
        mean_q: 1.7010364532470703
        mean_td_error: -0.17185015976428986
        min_q: 1.4909143447875977
    num_steps_sampled: 3013632
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2943,6747.5,3013632,50.8348,56.4137,47.2609,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-01-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 54.439907676902195
  episode_reward_mean: 50.989551188501785
  episode_reward_min: 47.2609473864124
  episodes_this_iter: 8
  episodes_total: 30160
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3016704
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00220570620149374
        max_q: 4.2347540855407715
        mean_q: 4.038047790527344
        mean_td_error: -0.02930450439453125
        min_q: 3.859344720840454
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0038933497853577137
        max_q: 2.5031538009643555
        mean_q: 2.2810111045837402
        mean_td_error: -0.03397178649902344
        min_q: 2.124483108520508
    num_steps_sampled: 3016704
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2946,6753.25,3016704,50.9896,54.4399,47.2609,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-01-52
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 53.96675727197403
  episode_reward_mean: 50.97994087574316
  episode_reward_min: 48.42985126035898
  episodes_this_iter: 8
  episodes_total: 30192
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3019776
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01723395846784115
        max_q: 4.106501579284668
        mean_q: 3.5855932235717773
        mean_td_error: -0.23907673358917236
        min_q: 3.130047082901001
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017535805702209473
        max_q: 2.5132102966308594
        mean_q: 2.117849588394165
        mean_td_error: -0.18289832770824432
        min_q: 1.8313350677490234
    num_steps_sampled: 3019776
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2949,6758.93,3019776,50.9799,53.9668,48.4299,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-01-58
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.47950379700972
  episode_reward_mean: 52.04969148006208
  episode_reward_min: 48.42985126035898
  episodes_this_iter: 8
  episodes_total: 30224
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3022848
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.034624338150024414
        max_q: 3.516282796859741
        mean_q: 3.1552834510803223
        mean_td_error: -0.43212947249412537
        min_q: 2.9572670459747314
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005745544098317623
        max_q: 1.8890855312347412
        mean_q: 1.6756376028060913
        mean_td_error: 0.019055228680372238
        min_q: 1.3420263528823853
    num_steps_sampled: 3022848
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2952,6764.51,3022848,52.0497,55.4795,48.4299,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-02-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 55.91304198333728
  episode_reward_mean: 52.09497248552167
  episode_reward_min: 45.94313563050181
  episodes_this_iter: 8
  episodes_total: 30256
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3025920
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005051856394857168
        max_q: 3.122999906539917
        mean_q: 2.488020420074463
        mean_td_error: 0.0591907724738121
        min_q: 2.3100392818450928
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0032846403773874044
        max_q: 2.0641424655914307
        mean_q: 1.8937443494796753
        mean_td_error: -0.0273900143802166
        min_q: 1.771789312362671
    num_steps_sampled: 3025920
    num_steps_trained

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2955,6770.14,3025920,52.095,55.913,45.9431,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-02-10
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.31448129306899
  episode_reward_mean: 53.42256167189486
  episode_reward_min: 45.94313563050181
  episodes_this_iter: 16
  episodes_total: 30288
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3028992
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02578369341790676
        max_q: 2.566908359527588
        mean_q: 2.1011641025543213
        mean_td_error: -0.340864896774292
        min_q: 1.4817790985107422
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0074210031889379025
        max_q: 1.9057503938674927
        mean_q: 1.626868724822998
        mean_td_error: -0.03804725781083107
        min_q: 1.3522915840148926
    num_steps_sampled: 3028992
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2958,6776.06,3028992,53.4226,57.3145,45.9431,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-02-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.31448129306899
  episode_reward_mean: 53.04553542995175
  episode_reward_min: 45.94313563050181
  episodes_this_iter: 16
  episodes_total: 30320
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3032064
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.019616631790995598
        max_q: 1.6050891876220703
        mean_q: 1.3601856231689453
        mean_td_error: -0.27370479702949524
        min_q: 1.0997045040130615
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010106092318892479
        max_q: 1.506425380706787
        mean_q: 1.3296051025390625
        mean_td_error: -0.08387880027294159
        min_q: 1.2038167715072632
    num_steps_sampled: 3032064
    num_steps_tr

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2961,6781.76,3032064,53.0455,57.3145,45.9431,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-02-21
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.31448129306899
  episode_reward_mean: 53.25688122806495
  episode_reward_min: 45.94313563050181
  episodes_this_iter: 8
  episodes_total: 30344
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3035136
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0037717295344918966
        max_q: 2.092365264892578
        mean_q: 1.892685890197754
        mean_td_error: 0.031713228672742844
        min_q: 1.7406960725784302
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008511662483215332
        max_q: 1.5989434719085693
        mean_q: 1.2148164510726929
        mean_td_error: 0.006132453680038452
        min_q: 0.8485770225524902
    num_steps_sampled: 3035136
    num_steps_tra

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2964,6787.32,3035136,53.2569,57.3145,45.9431,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-02-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.31448129306899
  episode_reward_mean: 53.60611027054659
  episode_reward_min: 49.22619120461786
  episodes_this_iter: 8
  episodes_total: 30376
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3038208
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011264205910265446
        max_q: 2.3758766651153564
        mean_q: 2.2475829124450684
        mean_td_error: -0.1157471239566803
        min_q: 2.1036770343780518
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005316691007465124
        max_q: 1.510040044784546
        mean_q: 1.3622127771377563
        mean_td_error: 0.025929398834705353
        min_q: 1.0826630592346191
    num_steps_sampled: 3038208
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2967,6792.88,3038208,53.6061,57.3145,49.2262,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-02-33
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.979117846877
  episode_reward_mean: 54.00051657840586
  episode_reward_min: 49.22619120461786
  episodes_this_iter: 8
  episodes_total: 30408
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3041280
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.008064388297498226
        max_q: 2.7504472732543945
        mean_q: 2.5019683837890625
        mean_td_error: -0.07081665843725204
        min_q: 2.370025157928467
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.03191551938652992
        max_q: 1.98396897315979
        mean_q: 1.8576257228851318
        mean_td_error: -0.30296435952186584
        min_q: 1.6689876317977905
    num_steps_sampled: 3041280
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2970,6798.38,3041280,54.0005,57.9791,49.2262,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-02-38
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.979117846877
  episode_reward_mean: 53.795489319963934
  episode_reward_min: 49.22619120461786
  episodes_this_iter: 8
  episodes_total: 30440
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3044352
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0033285089302808046
        max_q: 2.4858484268188477
        mean_q: 2.35113787651062
        mean_td_error: 0.020165927708148956
        min_q: 2.1973516941070557
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009486048482358456
        max_q: 2.3455312252044678
        mean_q: 2.2468457221984863
        mean_td_error: -0.08886610716581345
        min_q: 2.096787452697754
    num_steps_sampled: 3044352
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2973,6803.94,3044352,53.7955,57.9791,49.2262,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-02-44
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.979117846877
  episode_reward_mean: 53.924244925336225
  episode_reward_min: 48.8823383115394
  episodes_this_iter: 8
  episodes_total: 30472
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3047424
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.013907612301409245
        max_q: 2.8814730644226074
        mean_q: 2.7111692428588867
        mean_td_error: -0.1284990906715393
        min_q: 2.5963802337646484
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.009959692135453224
        max_q: 2.685868501663208
        mean_q: 2.408627510070801
        mean_td_error: -0.07286403328180313
        min_q: 2.12707257270813
    num_steps_sampled: 3047424
    num_steps_trained: 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2976,6809.82,3047424,53.9242,57.9791,48.8823,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-02-51
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.362684802513925
  episode_reward_mean: 54.42526436230064
  episode_reward_min: 48.8823383115394
  episodes_this_iter: 16
  episodes_total: 30504
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3050496
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.012087910436093807
        max_q: 3.1284499168395996
        mean_q: 2.6991143226623535
        mean_td_error: -0.09407690912485123
        min_q: 2.454401969909668
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01352635957300663
        max_q: 3.1951189041137695
        mean_q: 2.825605630874634
        mean_td_error: 0.10926178842782974
        min_q: 2.5897505283355713
    num_steps_sampled: 3050496
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2979,6815.93,3050496,54.4253,60.3627,48.8823,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-02-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.362684802513925
  episode_reward_mean: 55.103901784988444
  episode_reward_min: 48.8823383115394
  episodes_this_iter: 8
  episodes_total: 30528
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3053568
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.02173638343811035
        max_q: 2.6763060092926025
        mean_q: 2.5432636737823486
        mean_td_error: -0.198187917470932
        min_q: 2.3700954914093018
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.011300400830805302
        max_q: 2.857654094696045
        mean_q: 2.6503400802612305
        mean_td_error: -0.09296081960201263
        min_q: 2.411787509918213
    num_steps_sampled: 3053568
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2982,6821.85,3053568,55.1039,60.3627,48.8823,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-03-03
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 60.362684802513925
  episode_reward_mean: 55.14132844608412
  episode_reward_min: 48.8823383115394
  episodes_this_iter: 8
  episodes_total: 30560
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3056640
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.015330498106777668
        max_q: 3.2318148612976074
        mean_q: 3.11464786529541
        mean_td_error: 0.1681734025478363
        min_q: 2.921381711959839
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.010769660584628582
        max_q: 2.851926803588867
        mean_q: 2.6542437076568604
        mean_td_error: -0.07528433203697205
        min_q: 2.4698359966278076
    num_steps_sampled: 3056640
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2985,6827.64,3056640,55.1413,60.3627,48.8823,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-03-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.85909199043259
  episode_reward_mean: 54.227756226980766
  episode_reward_min: 50.62192376517335
  episodes_this_iter: 8
  episodes_total: 30592
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3059712
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.00397584680467844
        max_q: 3.5643179416656494
        mean_q: 3.365971326828003
        mean_td_error: -0.021029680967330933
        min_q: 3.2299962043762207
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.018753618001937866
        max_q: 2.05012583732605
        mean_q: 1.8233908414840698
        mean_td_error: -0.16220684349536896
        min_q: 1.5414940118789673
    num_steps_sampled: 3059712
    num_steps_trai

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2988,6833.48,3059712,54.2278,58.8591,50.6219,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-03-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 57.41186127206987
  episode_reward_mean: 52.96249298872886
  episode_reward_min: 46.20209731234815
  episodes_this_iter: 8
  episodes_total: 30624
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3062784
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.004431153181940317
        max_q: 3.839207649230957
        mean_q: 3.684866189956665
        mean_td_error: -0.04481832683086395
        min_q: 3.5730714797973633
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017512589693069458
        max_q: 2.459850311279297
        mean_q: 2.2547221183776855
        mean_td_error: -0.19226154685020447
        min_q: 2.090723991394043
    num_steps_sampled: 3062784
    num_steps_traine

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2991,6839.99,3062784,52.9625,57.4119,46.2021,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-03-22
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.08415547850453
  episode_reward_mean: 53.33926518175187
  episode_reward_min: 46.20209731234815
  episodes_this_iter: 8
  episodes_total: 30656
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3065856
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.005002638325095177
        max_q: 3.8768653869628906
        mean_q: 3.796351909637451
        mean_td_error: -0.05090460926294327
        min_q: 3.706597089767456
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.017124060541391373
        max_q: 2.513366937637329
        mean_q: 2.401538610458374
        mean_td_error: -0.1779738962650299
        min_q: 2.259510040283203
    num_steps_sampled: 3065856
    num_steps_trained:

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2994,6845.96,3065856,53.3393,58.0842,46.2021,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-03-27
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 58.08415547850453
  episode_reward_mean: 53.09838807201102
  episode_reward_min: 46.20209731234815
  episodes_this_iter: 16
  episodes_total: 30688
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3068928
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.01667851395905018
        max_q: 4.12650728225708
        mean_q: 3.9022481441497803
        mean_td_error: -0.1808125376701355
        min_q: 3.6650187969207764
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.0074919648468494415
        max_q: 2.8321571350097656
        mean_q: 2.682713031768799
        mean_td_error: -0.08862030506134033
        min_q: 2.5937345027923584
    num_steps_sampled: 3068928
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,2997,6851.62,3068928,53.0984,58.0842,46.2021,100


Result for DQN_diffdemand_3b8c4_00000:
  custom_metrics: {}
  date: 2021-04-12_15-03-34
  done: true
  episode_len_mean: 100.0
  episode_reward_max: 58.08415547850453
  episode_reward_mean: 53.19942952996097
  episode_reward_min: 48.27968720028034
  episodes_this_iter: 16
  episodes_total: 30720
  experiment_id: 2ddaa5d363c444cf9f43042b9151c47f
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3072000
    learner:
      policy_0:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.04083648696541786
        max_q: 3.4761900901794434
        mean_q: 3.3554346561431885
        mean_td_error: -0.47810208797454834
        min_q: 3.1348912715911865
      policy_1:
        allreduce_latency: 0.0
        cur_lr: 0.15
        grad_gnorm: 0.002265761373564601
        max_q: 3.408682107925415
        mean_q: 3.302133083343506
        mean_td_error: -0.023767247796058655
        min_q: 3.202605962753296
    num_steps_sampled: 3072000
    num_steps_train

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,RUNNING,192.168.1.202:21196,3000,6857.52,3072000,53.1994,58.0842,48.2797,100


Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DQN_diffdemand_3b8c4_00000,TERMINATED,,3000,6857.52,3072000,53.1994,58.0842,48.2797,100


Best checkpont: /Users/matiascovarrubias/ray_results/DQN_test_April9/DQN_diffdemand_3b8c4_00000_0_2021-04-12_13-05-07/checkpoint_3000/checkpoint-3000


## Continuous Space


In [13]:
#DDGP uses its own exploration config
# See exploration config in https://github.com/ray-project/ray/blob/master/rllib/utils/exploration/ornstein_uhlenbeck_noise.pyDDPG 
exploration_config = {
        # DDPG uses OrnsteinUhlenbeck (stateful) noise to be added to NN-output
        # actions (after a possible pure random phase of n timesteps).
        "type": "OrnsteinUhlenbeckNoise",
        "final_scale": 0.02,
        "scale_timesteps": 100000,
    }

training_config["exploration_config"] = exploration_config
env_config["mkt_config"]["space_type"] = "Continuous"

env=DiffDemand(env_config)
training_config["env_config"] = env_config
training_config["multiagent"]["policies"] =  {
            policy_ids[i]: (None, env.observation_space[f"agent_{i}"], env.action_space[f"agent_{i}"],{},) for i in range(env.n_agents)
}
#print(env_config)
print(training_config)
print(env.action_space)


{'gamma': 0.95, 'lr': 0.15, 'env': 'diffdemand', 'exploration_config': {'type': 'OrnsteinUhlenbeckNoise', 'final_scale': 0.02, 'scale_timesteps': 100000}, 'env_config': {'mkt_config': {'lower_price': [1.3699999999999999, 1.3699999999999999], 'higher_price': [2.03, 2.03], 'parameteres': {'cost': [1, 1], 'values': [2, 2], 'ext_demand': 0, 'substitution': 0.25}, 'space_type': 'Continuous', 'gridpoints': 16}}, 'horizon': 100, 'soft_horizon': True, 'no_done_at_end': True, 'multiagent': {'policies': {'policy_0': (None, Box(1.3699999999999999, 2.03, (2,), float64), Box(1.3699999999999999, 2.03, (1,), float64), {}), 'policy_1': (None, Box(1.3699999999999999, 2.03, (2,), float64), Box(1.3699999999999999, 2.03, (1,), float64), {})}, 'policy_mapping_fn': <function <lambda> at 0x346583550>}, 'framework': 'torch', 'num_workers': 8, 'num_gpus': 0, 'timesteps_per_iteration': 1000}
{'agent_0': Box(1.3699999999999999, 2.03, (1,), float64), 'agent_1': Box(1.3699999999999999, 2.03, (1,), float64)}


In [14]:
exp_name = "DDPG_cont_test_April9"
results = tune.run(
    "DDPG",
    name=exp_name,
    config=training_config,
    #checkpoint_freq=250,
    checkpoint_at_end=True,
    stop=stop,
    metric="episode_reward_mean",
    mode="max",
    callbacks=[MLflowLoggerCallback(experiment_name=exp_name, save_artifact=True)],
    verbose=3
)

best_checkpoint_DDPG = results.best_checkpoint
print("Best checkpont:", best_checkpoint_DDPG)

Trial name,status,loc
DDPG_diffdemand_c893c_00000,RUNNING,


estack` has been deprecated. Use `num_framestacks (int)` instead. This will raise an error in the future!
[2m[36m(pid=29734)[0m   torch.from_numpy(self.action_space.low).float())
[2m[36m(pid=29735)[0m   torch.from_numpy(self.action_space.low).float())
[2m[36m(pid=29735)[0m   torch.from_numpy(self.action_space.low).float())
[2m[36m(pid=29727)[0m {'agent_0': array([1.6, 1.6]), 'agent_1': array([1.6, 1.6])} {'agent_0': 0.27249236968976237, 'agent_1': 0.27249236968976237} {'__all__': False} {'agent_0': 1.6, 'agent_1': 1.6}
[2m[36m(pid=29727)[0m {'agent_0': array([1.6, 1.6]), 'agent_1': array([1.6, 1.6])} {'agent_0': 0.27249236968976237, 'agent_1': 0.27249236968976237} {'__all__': False} {'agent_0': 1.6, 'agent_1': 1.6}
[2m[36m(pid=29725)[0m {'agent_0': array([1.6, 1.6]), 'agent_1': array([1.6, 1.6])} {'agent_0': 0.27249236968976237, 'agent_1': 0.27249236968976237} {'__all__': False} {'agent_0': 1.6, 'agent_1': 1.6}
[2m[36m(pid=29725)[0m {'agent_0': array([1.6, 1.6]), '

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DDPG_diffdemand_c893c_00000,RUNNING,192.168.1.202:29717,1,1.50976,1504,54.6995,55.3859,53.3625,100


Result for DDPG_diffdemand_c893c_00000:
  custom_metrics: {}
  date: 2021-04-12_15-03-54
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0930330727414
  episode_reward_mean: 51.491426373001325
  episode_reward_min: 44.109462260047835
  episodes_this_iter: 16
  episodes_total: 24
  experiment_id: 408a4ebda6c44550bef02b0dd09cf0b5
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 2504
    learner:
      policy_0:
        actor_loss: -0.46607890725135803
        allreduce_latency: 0.0
        critic_loss: 0.0005611776141449809
        max_q: 0.628998339176178
        mean_q: 0.476421058177948
        mean_td_error: 0.0019068008987233043
        min_q: 0.38785067200660706
        td_error: "[ 0.07837963 -0.05513912  0.16240585  0.0634346  -0.11637056  0.03621751\n\
          \  0.06167123  0.07347596  0.07226807  0.09271479 -0.06468573  0.07092172\n\
          \ -0.07754868  0.12444997 -0.13558644  0.13953602 -0.14107156 -0.00931734\n\
          \ -0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DDPG_diffdemand_c893c_00000,RUNNING,192.168.1.202:29717,2,11.858,2504,51.4914,56.093,44.1095,100


Result for DDPG_diffdemand_c893c_00000:
  custom_metrics: {}
  date: 2021-04-12_15-04-04
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0930330727414
  episode_reward_mean: 48.49166544485884
  episode_reward_min: 39.130900551074205
  episodes_this_iter: 8
  episodes_total: 32
  experiment_id: 408a4ebda6c44550bef02b0dd09cf0b5
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 3504
    learner:
      policy_0:
        actor_loss: -0.5081722736358643
        allreduce_latency: 0.0
        critic_loss: 0.0005460760439746082
        max_q: 0.7106596231460571
        mean_q: 0.5253397822380066
        mean_td_error: 0.0009158630855381489
        min_q: 0.39330145716667175
        td_error: "[ 0.10459054  0.04601893  0.03217191 -0.32544038  0.06285962 -0.04447824\n\
          \  0.02846029  0.14891553  0.10442525  0.02858546  0.06073084  0.02734905\n\
          \  0.05973822  0.02816656 -0.10992944  0.15510905 -0.29908168  0.0365299\n\
          \  0.02

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DDPG_diffdemand_c893c_00000,RUNNING,192.168.1.202:29717,3,22.0013,3504,48.4917,56.093,39.1309,100


Result for DDPG_diffdemand_c893c_00000:
  custom_metrics: {}
  date: 2021-04-12_15-04-15
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0930330727414
  episode_reward_mean: 46.670727778385604
  episode_reward_min: 38.82628590346042
  episodes_this_iter: 8
  episodes_total: 40
  experiment_id: 408a4ebda6c44550bef02b0dd09cf0b5
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 4504
    learner:
      policy_0:
        actor_loss: -0.5261380672454834
        allreduce_latency: 0.0
        critic_loss: 0.0005984041490592062
        max_q: 0.7574036121368408
        mean_q: 0.5449230074882507
        mean_td_error: -0.008061928674578667
        min_q: 0.390208899974823
        td_error: "[ 0.01960599  0.12740499 -0.24688905  0.05520904  0.01828727  0.01614964\n\
          \ -0.13582867  0.01979306  0.02757034  0.11368519 -0.09061667  0.00877362\n\
          \ -0.20448297  0.02318186 -0.28860694  0.02053252  0.01614964 -0.17366433\n\
          \ -0.163

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DDPG_diffdemand_c893c_00000,RUNNING,192.168.1.202:29717,4,32.5607,4504,46.6707,56.093,38.8263,100


Result for DDPG_diffdemand_c893c_00000:
  custom_metrics: {}
  date: 2021-04-12_15-04-25
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0930330727414
  episode_reward_mean: 45.476668357230466
  episode_reward_min: 38.82628590346042
  episodes_this_iter: 8
  episodes_total: 48
  experiment_id: 408a4ebda6c44550bef02b0dd09cf0b5
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 5504
    learner:
      policy_0:
        actor_loss: -0.6583638191223145
        allreduce_latency: 0.0
        critic_loss: 0.0006136372103355825
        max_q: 0.7849420309066772
        mean_q: 0.5911955237388611
        mean_td_error: -0.012876739725470543
        min_q: 0.3846973776817322
        td_error: "[-1.22592866e-01 -6.10178113e-02  5.46039939e-02  5.15798926e-02\n\
          \  4.34563756e-02 -2.27396488e-01 -7.97448754e-02 -9.39368606e-02\n -2.43647754e-01\
          \ -2.12333083e-01 -1.58310413e-01 -6.01067543e-02\n -2.26107001e-01  3.75545025e-03\
         

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DDPG_diffdemand_c893c_00000,RUNNING,192.168.1.202:29717,5,42.6381,5504,45.4767,56.093,38.8263,100


Result for DDPG_diffdemand_c893c_00000:
  custom_metrics: {}
  date: 2021-04-12_15-04-35
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0930330727414
  episode_reward_mean: 43.28311264194534
  episode_reward_min: 36.13192996110427
  episodes_this_iter: 16
  episodes_total: 64
  experiment_id: 408a4ebda6c44550bef02b0dd09cf0b5
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 6504
    learner:
      policy_0:
        actor_loss: -0.6930789947509766
        allreduce_latency: 0.0
        critic_loss: 0.00048227584920823574
        max_q: 0.8263306021690369
        mean_q: 0.6391732692718506
        mean_td_error: -0.005487625487148762
        min_q: 0.4610462188720703
        td_error: "[ 4.26390171e-02  9.40655470e-02  2.13122964e-02  1.56962872e-02\n\
          \  3.87936234e-02  1.05275273e-01 -6.78918362e-02  3.98758650e-02\n  3.87936234e-02\
          \ -2.08104312e-01  1.93914354e-01  3.96276116e-02\n -7.19031692e-03  1.34249330e-02\
        

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DDPG_diffdemand_c893c_00000,RUNNING,192.168.1.202:29717,6,53.0174,6504,43.2831,56.093,36.1319,100


Result for DDPG_diffdemand_c893c_00000:
  custom_metrics: {}
  date: 2021-04-12_15-04-46
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0930330727414
  episode_reward_mean: 42.50204271212579
  episode_reward_min: 35.93767996260311
  episodes_this_iter: 8
  episodes_total: 72
  experiment_id: 408a4ebda6c44550bef02b0dd09cf0b5
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 7504
    learner:
      policy_0:
        actor_loss: -0.7226266860961914
        allreduce_latency: 0.0
        critic_loss: 0.0004650880873668939
        max_q: 0.893939197063446
        mean_q: 0.6835399866104126
        mean_td_error: -0.00878012366592884
        min_q: 0.5265695452690125
        td_error: "[ 2.41695046e-02  1.03499293e-02 -1.82884574e-01 -2.31153667e-01\n\
          \  2.55886912e-02  2.59917378e-02  2.74375081e-02  1.91146493e-01\n  7.99137354e-03\
          \  2.04629898e-02  7.63852000e-02 -2.74757683e-01\n  2.32144594e-02  1.08035922e-01\
          \ 

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DDPG_diffdemand_c893c_00000,RUNNING,192.168.1.202:29717,7,63.7578,7504,42.502,56.093,35.9377,100


Result for DDPG_diffdemand_c893c_00000:
  custom_metrics: {}
  date: 2021-04-12_15-04-57
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0930330727414
  episode_reward_mean: 41.88335145121788
  episode_reward_min: 35.93767996260311
  episodes_this_iter: 8
  episodes_total: 80
  experiment_id: 408a4ebda6c44550bef02b0dd09cf0b5
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 8504
    learner:
      policy_0:
        actor_loss: -0.7600094079971313
        allreduce_latency: 0.0
        critic_loss: 0.00048240573960356414
        max_q: 0.9262548089027405
        mean_q: 0.7312755584716797
        mean_td_error: -0.01703658513724804
        min_q: 0.5787982940673828
        td_error: "[ 1.59913301e-02 -2.34491765e-01  1.68766379e-02  1.49891973e-02\n\
          \ -4.79642749e-02  7.18195438e-02  6.99090958e-03 -6.64472580e-04\n  1.46655440e-02\
          \ -1.07059479e-02  2.00505495e-01  1.69336200e-02\n -9.34713483e-02  1.07594132e-02\
          

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DDPG_diffdemand_c893c_00000,RUNNING,192.168.1.202:29717,8,74.4292,8504,41.8834,56.093,35.9377,100


Result for DDPG_diffdemand_c893c_00000:
  custom_metrics: {}
  date: 2021-04-12_15-05-09
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 56.0930330727414
  episode_reward_mean: 41.37042861042514
  episode_reward_min: 35.86383869784273
  episodes_this_iter: 8
  episodes_total: 88
  experiment_id: 408a4ebda6c44550bef02b0dd09cf0b5
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 9504
    learner:
      policy_0:
        actor_loss: -0.8396894335746765
        allreduce_latency: 0.0
        critic_loss: 0.00043896661372855306
        max_q: 0.9795256853103638
        mean_q: 0.7694618701934814
        mean_td_error: -0.013088141568005085
        min_q: 0.6282723546028137
        td_error: "[ 0.01002342 -0.01844865 -0.01597005  0.01163679 -0.17623198  0.00674921\n\
          \ -0.21035123  0.06300437  0.01787001  0.05029774  0.03065401 -0.16085374\n\
          \ -0.01783901  0.00966543  0.10703808 -0.22864491  0.09764826 -0.06599927\n\
          \  0.18

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DDPG_diffdemand_c893c_00000,RUNNING,192.168.1.202:29717,9,86.7147,9504,41.3704,56.093,35.8638,100


Result for DDPG_diffdemand_c893c_00000:
  custom_metrics: {}
  date: 2021-04-12_15-05-23
  done: false
  episode_len_mean: 100.0
  episode_reward_max: 61.91441305833922
  episode_reward_mean: 42.23897485779679
  episode_reward_min: 35.86383869784273
  episodes_this_iter: 16
  episodes_total: 104
  experiment_id: 408a4ebda6c44550bef02b0dd09cf0b5
  hostname: Matiass-MacBook-Pro.local
  info:
    last_target_update_ts: 10504
    learner:
      policy_0:
        actor_loss: -0.9311899542808533
        allreduce_latency: 0.0
        critic_loss: 0.0004345330235082656
        max_q: 1.060697317123413
        mean_q: 0.860946774482727
        mean_td_error: -0.0033799023367464542
        min_q: 0.7040072083473206
        td_error: "[ 0.04939014  0.02821594 -0.02119529  0.01650542  0.00352967 -0.18112224\n\
          \ -0.09040523  0.00188905  0.01383454 -0.0991233   0.02420008  0.02302003\n\
          \  0.02154779  0.0264436   0.02608293  0.02650076  0.0358364  -0.11829305\n\
          \  0.

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
DDPG_diffdemand_c893c_00000,RUNNING,192.168.1.202:29717,10,100.196,10504,42.239,61.9144,35.8638,100


KeyboardInterrupt: 

In [None]:
shutdown()