In [1]:
import scipy.signal
import sys
import torch
import torch.nn as nn
import numpy as np

In [2]:
from typing import Dict, List, Optional, Tuple
import gym
from PIL import Image
# from pyvirtualdisplay import Display
# Display().start()
from datetime import datetime
from tqdm import tqdm

In [3]:
import math
import random
from copy import deepcopy
import torch
from torch.optim import Adam
from torch.optim import RMSprop
import gym
import time
from collections import namedtuple, deque
import neptune.new as neptune

In [4]:
import robosuite as suite
from robosuite.controllers import load_controller_config
from robosuite.controllers.controller_factory import reset_controllers
from robosuite.utils import observables
from robosuite.utils.input_utils import *
from robosuite.robots import Bimanual
import imageio
import numpy as np
import robosuite.utils.macros as macros
macros.IMAGE_CONVENTION = "opencv"

In [5]:
nep_log = neptune.init(
    project="xhnfirst/DDPG-robosuite",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI1NTg5MDI2OS01MTVmLTQ2YjUtODA1Yy02ZWQyNDgxZDcwN2UifQ==",
)

https://app.neptune.ai/xhnfirst/DDPG-robosuite/e/DDPGROB-214
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [6]:
options = {
    'env_name': 'EElab_test4',
    "robots": "UR5e"
}
controller_name = "JOINT_VELOCITY"
options["controller_configs"] = suite.load_controller_config(default_controller=controller_name)

env = suite.make(
    **options,
    has_renderer=False,
    has_offscreen_renderer=True,
    ignore_done=True,
    use_camera_obs=False,
    gripper_types=None,
    renderer = 'mujoco',

)

test_env = suite.make(
    **options,
    has_renderer=False,
    has_offscreen_renderer=False,
    ignore_done=True,
    use_camera_obs=False,
    gripper_types=None,
    renderer = 'mujoco',
)


video_env = suite.make(
    **options,
    gripper_types=None,
    has_renderer=False,
    has_offscreen_renderer=True,
    ignore_done=True,
    use_camera_obs=True,
    use_object_obs=True, 
    camera_names='Labviewer',
    camera_heights=512,
    camera_widths=512,
    # control_freq=200,
    renderer = 'mujoco',
)

frame = []
device= torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device = ', device)

device =  cuda


In [7]:
def mlp(sizes, activation, output_activation=nn.Identity):
    layers = []
    for j in range(len(sizes)-1):
        act = activation if j < len(sizes)-2 else output_activation
        layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
    return nn.Sequential(*layers)


class MLPActor(nn.Module):

    def __init__(self, obs_dim, act_dim, hidden_sizes, activation, act_limit):
        super().__init__()
        pi_sizes = [obs_dim] + list(hidden_sizes) + [act_dim]
        self.pi = mlp(pi_sizes, activation, nn.Tanh)
        self.act_limit = act_limit

    def forward(self, obs):
        # Return output from network scaled to action space limits.
        return self.act_limit * self.pi(obs)

class MLPQFunction(nn.Module):

    def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
        super().__init__()
        self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation)

    def forward(self, obs, act):
        q = self.q(torch.cat([obs, act], dim=-1))
        return torch.squeeze(q, -1) # Critical to ensure q has right shape.

class MLPActorCritic(nn.Module):

    def __init__(self, hidden_sizes=(256,256),
                 activation=nn.ReLU, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
        super().__init__()

        obs_dim = 35
        act_dim = 6
        act_limit = 1

        # build policy and value functions
        self.pi = MLPActor(obs_dim, act_dim, hidden_sizes, activation, act_limit).to(device)
        self.q = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation).to(device)

    def act(self, obs):
        with torch.no_grad():
            return self.pi(obs)

In [8]:
Transition = namedtuple('Transition',
                        ('obs', 'act', 'rew', 'next_obs', 'done'))

class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([],maxlen=capacity)

    def push(self, *args):
        """Save a transition"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [9]:

params = {
    "dropout": 0.2,
    "learning_rate": 0.001,
    "optimizer": "Adam",
    "hid": 256,
    "l": 3,
    "seed": 0,
    "steps_per_epoch": 3000,
    "steps_video": 30000,
    "epochs": 1000,
    "replay_size": int(1e8),
    "gamma": 0.99,
    "polyak": 0.995,
    "pi_lr": 1e-4,
    "q_lr": 1e-4,
    "batch_size": 1000,
    "start_steps": 10000, 
    "update_after": 5000,
    "update_every": 100,
    "act_noise": 0.01,
    "num_test_episodes": 5,
    "max_ep_len": 1000,
    "max_video_len": 1000,
    "save_model_len": 10000,
    # "obs_dim": 47,
    # "act_dim": 7,
    # "act_limit": 1
}

ac_kwargs=dict(hidden_sizes=[params["hid"]]*params["l"])

In [10]:
nep_log["parameters"] = params

torch.manual_seed(params["seed"])
np.random.seed(params["seed"])

obs_dim = 35
print('obs_dim = ', obs_dim)
act_dim = 6
print('act_dim = ', act_dim)
# Action limit for clamping: critically, assumes all dimensions share the same bound!
act_limit = 1
print('act_limit = ', act_limit)
# Create actor-critic module and target networks
ac = MLPActorCritic(**ac_kwargs)
ac_targ = deepcopy(ac)

# Freeze target networks with respect to optimizers (only update via polyak averaging)
for p in ac_targ.parameters():
    p.requires_grad = False

memory = ReplayMemory(params["replay_size"])

obs_dim =  35
act_dim =  6
act_limit =  1


In [11]:
# Set up function for computing DDPG Q-loss
def compute_loss_q(data):

    o = torch.cat(data.obs).float()
    a = torch.cat(data.act).float()
    r = torch.cat(data.rew).float()
    o2 =torch.cat(data.next_obs).float()
    d = torch.cat(data.done).float()

    q = ac.q(o,a)


    # Bellman backup for Q function
    with torch.no_grad():
        q_pi_targ = ac_targ.q(o2, ac_targ.pi(o2))
        backup = r + params["gamma"] * (1 - d) * q_pi_targ

    # MSE loss against Bellman backup
    loss_q = ((q - backup)**2).mean()

    return loss_q

# Set up function for computing DDPG pi loss
def compute_loss_pi(data):

    o = torch.cat(data.obs).float()

    q_pi = ac.q(o, ac.pi(o))

    return -q_pi.mean()


In [12]:
pi_optimizer = RMSprop(ac.pi.parameters(), lr=params["pi_lr"])
q_optimizer = RMSprop(ac.q.parameters(), lr=params["q_lr"])

def update(data):
    # First run one gradient descent step for Q.


    q_optimizer.zero_grad()
    loss_q = compute_loss_q(data)

    loss_q.backward()

    q_optimizer.step()


    # Freeze Q-network so you don't waste computational effort 
    # computing gradients for it during the policy learning step.
    for p in ac.q.parameters():
        p.requires_grad = False

    # Next run one gradient descent step for pi.
    pi_optimizer.zero_grad()
    loss_pi = compute_loss_pi(data)
    loss_pi.backward()
    pi_optimizer.step()

    # Unfreeze Q-network so you can optimize it at next DDPG step.
    for p in ac.q.parameters():
        p.requires_grad = True



    # Finally, update target networks by polyak averaging.
    with torch.no_grad():
        for p, p_targ in zip(ac.parameters(), ac_targ.parameters()):
            # NB: We use an in-place operations "mul_", "add_" to update target
            # params, as opposed to "mul" and "add", which would make new tensors.
            p_targ.data.mul_(params["polyak"])
            p_targ.data.add_((1 - params["polyak"]) * p.data)


In [13]:



def get_action(o, noise_scale):
    a = ac.act(torch.as_tensor(o, dtype=torch.float32))
    a += noise_scale * torch.randn(act_dim).to(device)
    return torch.clip(a, -act_limit, act_limit)

def test_agent(epoch):
    test_main = 0
    test_step = 0
    for j in range(params["num_test_episodes"]):
        obs, d, test_ep_ret, test_ep_len = test_env.reset(), False, 0, 0
        o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
        o = torch.tensor([o], dtype=torch.float32, device=device)
        while not(d or (test_ep_len == params["max_ep_len"])):
            a_cpu = get_action(o, 0).cpu().data.numpy()
            obs, r, d, _ = test_env.step(a_cpu[0])
            o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
            o = torch.tensor([o], dtype=torch.float32, device=device)
            test_ep_ret += r
            test_ep_len += 1
        test_ep_main = test_ep_ret/test_ep_len
        test_step +=1
        test_main += test_ep_main
    print('test_rew_main = ', float(test_main/test_step))
    nep_log["test/reward"].log(test_main/test_step)
    
def video_agent(epoch):
    obs, d, test_ep_len = video_env.reset(), False, 0
    o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
    o = torch.tensor([o], dtype=torch.float32, device=device)
    now = datetime.now()
    current_time = str(now.isoformat())
    writer = imageio.get_writer(
        "/home/xhnfly/Cosmic_rays_X/X_Robot/robosuite/robosuite/demos/video/DDPG_UR5_%s_ep_%d.mp4" % (current_time, epoch), fps=100)
    frame = obs["Labviewer_image"]
    writer.append_data(frame)

    while not(d or (test_ep_len == params["max_video_len"])):
        a_cpu = get_action(o, 0).cpu().data.numpy()
        obs, _, d, _ = video_env.step(a_cpu[0])
        o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
        o = torch.tensor([o], dtype=torch.float32, device=device)
        frame = obs["Labviewer_image"]
        writer.append_data(frame)
        test_ep_len += 1
    writer.close()
    nep_log['video'] = neptune.types.File('/home/xhnfly/Cosmic_rays_X/X_Robot/robosuite/robosuite/demos/video/DDPG_UR5_%s_ep_%d.mp4' % (current_time, epoch))





In [14]:
# obs = {
#     'robot0_joint_pos_cos': None,
#     'robot0_joint_pos_sin': None,
#     'robot0_joint_vel': None,
#     'robot0_eef_pos': None,
#     'robot0_eef_quat': None,
#     'robot0_gripper_qpos': None,
#     'robot0_gripper_qvel': None,
#     'cubeA_pos': None,
#     'cubeA_quat': None,
#     'cubeB_pos': None,
#     'cubeB_quat': None,
#     'gripper_to_cubeA': None,
#     'gripper_to_cubeB': None,
#     'cubeA_to_cubeB': None,
# }

obs, ep_ret, ep_len = env.reset(), 0, 0

o = list(obs['robot0_proprio-state']) + list(obs['object-state'])

# env.viewer.set_camera(camera_id=0)


# Define neutral value
neutral = np.zeros(7)

# Keep track of done variable to know when to break loop

# Prepare for interaction with environment
total_steps = params["steps_per_epoch"] * params["epochs"]
start_time = time.time()

o = torch.tensor([o], device=device)


start_time_rec = datetime.now()
r_true = 0
total_main = 0
ep_rew_main = 0
reward_dict={}

In [15]:
# Main loop: collect experience in env and update/log each epoch
low, high = env.action_spec

for t in tqdm(range(total_steps)):
    
    # Until start_steps have elapsed, randomly sample actions
    # from a uniform distribution for better exploration. Afterwards, 
    # use the learned policy (with some noise, via act_noise). 
    if t > params["start_steps"]:
        a = get_action(o, params["act_noise"])      # Tensor
    else:
        a = torch.tensor([np.random.uniform(low, high)], dtype=torch.float32, device=device)
        
    a_cpu = a.cpu().data.numpy()
    # Step the env
    obs2, r, d, _ = env.step(a_cpu[0])
    
    o2 = list(obs2['robot0_proprio-state']) + list(obs2['object-state'])
    # print('len(o2) = ', len(o2))

    ep_len += 1
    total_main += r


    # Ignore the "done" signal if it comes from hitting the time
    # horizon (that is, when it's an artificial terminal signal
    # that isn't based on the agent's state)
    d = False if ep_len==params["max_ep_len"] else d

    o2 = torch.tensor([o2], dtype=torch.float32, device=device)
    r = torch.tensor([r], dtype=torch.float32, device=device)
    d = torch.tensor([d], dtype=torch.float32, device=device)

    # Store experience to replay buffer
    memory.push(o, a, r, o2, d)
    nep_log["train/o"].log(o)
    nep_log["train/a"].log(a)
    nep_log["train/r"].log(r)
    nep_log["train/o2"].log(o2)
    nep_log["train/d"].log(d)

    # Super critical, easy to overlook step: make sure to update 
    # most recent observation!
    o=o2
    ep_ret += r
    
    
    # End of trajectory handling
    if d or (ep_len == params["max_ep_len"]):
        ep_rew = ep_ret/ep_len
        ep_rew_main += ep_rew
        obs, ep_ret, ep_len = env.reset(), 0, 0
        o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
        o = torch.tensor([o], device=device)


    # Update handling
    if t >= params["update_after"] and t % params["update_every"] == 0:
        for i in range(params["update_every"]):

            transitions = memory.sample(params["batch_size"])
            # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
            # detailed explanation). This converts batch-array of Transitions
            # to Transition of batch-arrays.
            batch = Transition(*zip(*transitions))
            update(data=batch)

    # End of epoch handling
    if (t+1) % params["steps_per_epoch"] == 0:
        epoch = (t+1) // params["steps_per_epoch"]

        nep_log["train/reward"].log(ep_rew_main)
        print('ep_rew_main = ', ep_rew_main.cpu().data.numpy())
        ep_rew_main = 0
        # Test the performance of the deterministic version of the agent.
        test_agent(epoch)
        

    if (t+1) % params["steps_video"] == 0:
        epoch = (t+1) // params["steps_per_epoch"]
        now = datetime.now()
        current_time = str(now.isoformat())
        print('current_time = ', current_time)
        video_agent(epoch)
        now = datetime.now()
        current_time = str(now.isoformat())
        print('current_time = ', current_time)

    if (t+1) % params["save_model_len"] == 0:
        epoch = (t+1) // params["steps_per_epoch"]
        now = datetime.now()
        current_time = str(now.isoformat())
        torch.save({
                    'model of ac.q': ac.q.state_dict(),
                    'model of ac.pi': ac.pi.state_dict(),
                    'q_optimizer_state_dict': q_optimizer.state_dict(),
                    'pi_optimizer_state_dict': pi_optimizer.state_dict(),
                    
                    }, "model_nn/model_nn_%s%d.pt" % (current_time, epoch))


        

  a = torch.tensor([np.random.uniform(low, high)], dtype=torch.float32, device=device)
  0%|          | 2984/3000000 [00:25<6:41:51, 124.30it/s]

ep_rew_main =  [1.0374437e-06]


  0%|          | 3018/3000000 [00:44<210:26:22,  3.96it/s]

test_rew_main =  1.136665979579532e-06


  0%|          | 5992/3000000 [01:20<8:46:23, 94.80it/s]  

ep_rew_main =  [-0.00207336]


  0%|          | 6000/3000000 [01:41<364:16:08,  2.28it/s]

test_rew_main =  6.543152033114508e-06


  0%|          | 8997/3000000 [02:48<8:31:01, 97.55it/s]  

ep_rew_main =  [-0.00618795]


  0%|          | 9000/3000000 [03:09<386:10:04,  2.15it/s]

test_rew_main =  6.325714723931707e-06


  0%|          | 11992/3000000 [04:18<9:20:25, 88.86it/s] 

ep_rew_main =  [2.8740096e-06]


  0%|          | 12000/3000000 [04:40<440:57:22,  1.88it/s]

test_rew_main =  2.533861136876592e-07


  0%|          | 14998/3000000 [05:53<9:56:32, 83.40it/s]  

ep_rew_main =  [3.3235515e-06]


  0%|          | 15000/3000000 [06:18<590:06:04,  1.41it/s]

test_rew_main =  1.4341476798005014e-06


  1%|          | 17989/3000000 [07:30<10:02:16, 82.52it/s] 

ep_rew_main =  [6.951682e-07]


  1%|          | 18000/3000000 [07:51<381:12:04,  2.17it/s]

test_rew_main =  1.9418623570466975e-06


  1%|          | 20997/3000000 [09:10<9:02:04, 91.59it/s]  

ep_rew_main =  [-0.00138379]


  1%|          | 21000/3000000 [09:32<473:33:02,  1.75it/s]

test_rew_main =  -0.02237486978613418


  1%|          | 23991/3000000 [10:57<10:52:01, 76.07it/s] 

ep_rew_main =  [6.0726995e-07]


  1%|          | 24000/3000000 [11:17<386:57:43,  2.14it/s]

test_rew_main =  6.405818158221255e-06


  1%|          | 26995/3000000 [12:42<11:20:50, 72.78it/s] 

ep_rew_main =  [-0.00829273]


  1%|          | 27000/3000000 [13:03<599:00:28,  1.38it/s]

test_rew_main =  7.0359533741459754e-09


  1%|          | 29992/3000000 [14:29<9:30:14, 86.80it/s]  

ep_rew_main =  [-0.03054759]


  1%|          | 29992/3000000 [14:42<9:30:14, 86.80it/s]

test_rew_main =  9.944978748355206e-09
current_time =  2022-02-14T23:03:01.614334


  1%|          | 30000/3000000 [16:12<2220:04:13,  2.69s/it]

current_time =  2022-02-14T23:04:25.660585


  1%|          | 32993/3000000 [17:32<10:16:48, 80.17it/s]  

ep_rew_main =  [2.1162157e-07]


  1%|          | 33000/3000000 [17:54<457:24:02,  1.80it/s]

test_rew_main =  -0.012339765052387954


  1%|          | 35991/3000000 [19:18<11:13:10, 73.38it/s] 

ep_rew_main =  [-0.00149953]


  1%|          | 36000/3000000 [19:41<450:08:27,  1.83it/s]

test_rew_main =  -7.926271129606944e-05


  1%|▏         | 38993/3000000 [21:11<10:45:46, 76.42it/s] 

ep_rew_main =  [3.2450203e-09]


  1%|▏         | 39000/3000000 [21:28<341:43:16,  2.41it/s]

test_rew_main =  -0.011298388241162733


  1%|▏         | 41990/3000000 [22:31<9:19:23, 88.13it/s]  

ep_rew_main =  [9.954246e-07]


  1%|▏         | 42000/3000000 [22:49<306:06:33,  2.68it/s]

test_rew_main =  -0.0025597242262603106


  1%|▏         | 44994/3000000 [23:57<9:59:47, 82.11it/s]  

ep_rew_main =  [5.3587492e-08]
test_rew_main =  -0.015114511280850456


  2%|▏         | 47985/3000000 [25:08<9:11:45, 89.17it/s]  

ep_rew_main =  [-0.06608254]
test_rew_main =  -0.023468290271429354


  2%|▏         | 50996/3000000 [26:17<8:04:16, 101.49it/s] 

ep_rew_main =  [-0.05719579]
test_rew_main =  -0.00035971216028784283


  2%|▏         | 53986/3000000 [27:28<10:45:06, 76.11it/s] 

ep_rew_main =  [-0.00299389]
test_rew_main =  4.36136671688847e-06


  2%|▏         | 56989/3000000 [28:41<8:12:16, 99.64it/s]  

ep_rew_main =  [-0.01559983]


  2%|▏         | 57000/3000000 [28:57<225:11:37,  3.63it/s]

test_rew_main =  -0.009717477625608314


  2%|▏         | 59992/3000000 [29:51<8:02:15, 101.60it/s] 

ep_rew_main =  [-0.00039112]


  2%|▏         | 59992/3000000 [30:04<8:02:15, 101.60it/s]

test_rew_main =  1.4337225635931538e-06
current_time =  2022-02-14T23:18:18.320015


  2%|▏         | 60000/3000000 [31:08<1063:03:45,  1.30s/it]

current_time =  2022-02-14T23:19:20.834845


  2%|▏         | 62992/3000000 [32:03<9:21:53, 87.12it/s]   

ep_rew_main =  [-0.03009659]


  2%|▏         | 63000/3000000 [32:20<251:31:30,  3.24it/s]

test_rew_main =  -0.0027786204789501806


  2%|▏         | 65991/3000000 [33:19<8:44:46, 93.18it/s]  

ep_rew_main =  [-0.03649898]


  2%|▏         | 66000/3000000 [33:35<262:44:48,  3.10it/s]

test_rew_main =  -0.002535502624302819


  2%|▏         | 68990/3000000 [34:34<9:19:31, 87.31it/s]  

ep_rew_main =  [-0.02909707]


  2%|▏         | 69000/3000000 [34:49<289:15:48,  2.81it/s]

test_rew_main =  -0.000148085601915479


  2%|▏         | 71994/3000000 [35:50<8:57:58, 90.71it/s]  

ep_rew_main =  [-0.00279406]


  2%|▏         | 72000/3000000 [36:05<241:30:42,  3.37it/s]

test_rew_main =  -0.0076991752316346584


  2%|▏         | 74995/3000000 [37:06<8:24:07, 96.70it/s]  

ep_rew_main =  [-0.0008496]
test_rew_main =  -0.00014319581500378472


  3%|▎         | 77986/3000000 [38:20<9:37:02, 84.40it/s]  

ep_rew_main =  [-0.01619669]


  3%|▎         | 78000/3000000 [38:35<218:52:23,  3.71it/s]

test_rew_main =  -0.02579541411683304


  3%|▎         | 80985/3000000 [39:35<11:11:14, 72.48it/s] 

ep_rew_main =  [-0.01129848]
test_rew_main =  -0.0012908610051828161


  3%|▎         | 83992/3000000 [41:00<8:25:10, 96.21it/s]  

ep_rew_main =  [-0.09530536]
test_rew_main =  -0.03578465413818185


  3%|▎         | 86997/3000000 [42:21<9:28:59, 85.33it/s]  

ep_rew_main =  [0.00021317]
test_rew_main =  -0.00921939839922398


  3%|▎         | 89978/3000000 [43:38<10:44:45, 75.22it/s] 

ep_rew_main =  [0.0026291]
test_rew_main =  -0.01995218919031107
current_time =  2022-02-14T23:32:05.702674


  3%|▎         | 90000/3000000 [44:51<859:39:02,  1.06s/it]

current_time =  2022-02-14T23:33:04.195023


  3%|▎         | 92992/3000000 [45:50<8:32:47, 94.48it/s]  

ep_rew_main =  [-0.07116383]
test_rew_main =  0.0003164354098135689


  3%|▎         | 95998/3000000 [47:01<8:10:16, 98.72it/s]  

ep_rew_main =  [-0.01619529]
test_rew_main =  -0.008770172017840791


  3%|▎         | 98988/3000000 [48:11<8:32:20, 94.37it/s]  

ep_rew_main =  [-0.000883]
test_rew_main =  -0.013637861587548362


  3%|▎         | 101992/3000000 [49:20<8:16:13, 97.34it/s] 

ep_rew_main =  [-0.00319907]
test_rew_main =  -9.606262794313166e-05


  3%|▎         | 104997/3000000 [50:29<8:08:29, 98.77it/s]  

ep_rew_main =  [-0.04542682]
test_rew_main =  3.905022522597989e-06


  4%|▎         | 107992/3000000 [51:38<8:02:42, 99.85it/s]  

ep_rew_main =  [-0.09517901]
test_rew_main =  -0.0009333516848063454


  4%|▎         | 110985/3000000 [52:46<9:16:48, 86.47it/s]  

ep_rew_main =  [-0.0016908]


  4%|▎         | 111000/3000000 [52:58<150:50:54,  5.32it/s]

test_rew_main =  -0.009158049694703333


  4%|▍         | 113976/3000000 [53:54<9:58:27, 80.37it/s]  

ep_rew_main =  [2.7506843e-05]


  4%|▍         | 114000/3000000 [54:08<145:00:43,  5.53it/s]

test_rew_main =  -0.025021615006629444


  4%|▍         | 116999/3000000 [55:05<8:12:35, 97.54it/s]  

ep_rew_main =  [-0.00591966]


  4%|▍         | 117000/3000000 [55:17<201:00:32,  3.98it/s]

test_rew_main =  -0.013491619040265352


  4%|▍         | 119998/3000000 [56:16<8:19:26, 96.11it/s]  

ep_rew_main =  [0.00519745]


  4%|▍         | 119998/3000000 [56:28<8:19:26, 96.11it/s]

test_rew_main =  -0.010566185172176303
current_time =  2022-02-14T23:44:42.673368


  4%|▍         | 120000/3000000 [57:30<1092:49:42,  1.37s/it]

current_time =  2022-02-14T23:45:43.106301


  4%|▍         | 122986/3000000 [58:30<9:21:20, 85.42it/s]   

ep_rew_main =  [-0.00687552]
test_rew_main =  -0.02991366662968089


  4%|▍         | 125984/3000000 [59:43<10:11:14, 78.37it/s] 

ep_rew_main =  [-0.08535711]
test_rew_main =  -0.026720679679158803


  4%|▍         | 128989/3000000 [1:00:54<8:37:50, 92.40it/s] 

ep_rew_main =  [-0.04261373]
test_rew_main =  -0.011872787942501047


  4%|▍         | 131986/3000000 [1:02:05<9:58:24, 79.88it/s]  

ep_rew_main =  [-0.01341787]
test_rew_main =  -0.015006189441963243


  4%|▍         | 134990/3000000 [1:03:16<9:53:36, 80.44it/s]  

ep_rew_main =  [-0.01337848]


  4%|▍         | 135000/3000000 [1:03:29<165:01:28,  4.82it/s]

test_rew_main =  -0.0005162554224180145


  5%|▍         | 137990/3000000 [1:04:29<8:44:26, 90.95it/s]  

ep_rew_main =  [-0.00061429]
test_rew_main =  -0.0002565198921277532


  5%|▍         | 140977/3000000 [1:05:41<10:21:02, 76.73it/s] 

ep_rew_main =  [-0.0470621]


  5%|▍         | 141000/3000000 [1:05:53<139:02:10,  5.71it/s]

test_rew_main =  0.0009394500174076441


  5%|▍         | 143999/3000000 [1:07:08<20:04:07, 39.53it/s] 

ep_rew_main =  [-0.00164188]


  5%|▍         | 144000/3000000 [1:08:15<3577:57:42,  4.51s/it]

test_rew_main =  -0.015886686566823932


  5%|▍         | 146999/3000000 [1:13:22<23:20:50, 33.94it/s]  

ep_rew_main =  [8.970105e-05]


  5%|▍         | 147000/3000000 [1:14:22<4447:05:01,  5.61s/it]

test_rew_main =  -0.000501934613643258


  5%|▍         | 148098/3000000 [1:16:18<18:31:18, 42.77it/s]  Experiencing connection interruptions. Will try to reestablish communication with Neptune. Internal exception was: RequestsFutureAdapterConnectionError
  5%|▍         | 148108/3000000 [1:16:26<253:46:59,  3.12it/s]Experiencing connection interruptions. Will try to reestablish communication with Neptune. Internal exception was: RequestsFutureAdapterConnectionError
  5%|▍         | 149998/3000000 [1:19:25<17:42:08, 44.72it/s] 

ep_rew_main =  [1.4302327e-05]


  5%|▍         | 149998/3000000 [1:19:44<17:42:08, 44.72it/s]

test_rew_main =  0.0001341205976349081
current_time =  2022-02-15T00:08:36.965687


  5%|▌         | 150000/3000000 [1:25:06<19799:40:41, 25.01s/it]

current_time =  2022-02-15T00:13:19.487011


  5%|▌         | 152800/3000000 [1:29:44<20:25:12, 38.73it/s]   Communication with Neptune restored!
Communication with Neptune restored!
  5%|▌         | 152998/3000000 [1:30:06<27:22:26, 28.89it/s] 

ep_rew_main =  [0.00976202]


  5%|▌         | 153000/3000000 [1:31:12<3844:30:25,  4.86s/it]

test_rew_main =  0.0007987377103440003


  5%|▌         | 155986/3000000 [1:33:00<10:30:47, 75.14it/s]  

ep_rew_main =  [-0.01021615]


  5%|▌         | 156000/3000000 [1:33:14<172:28:44,  4.58it/s]

test_rew_main =  -0.01931581401324991


  5%|▌         | 158984/3000000 [1:34:24<11:18:20, 69.80it/s] 

ep_rew_main =  [-0.00250949]
test_rew_main =  -0.006204798991003567


  5%|▌         | 161980/3000000 [1:35:43<11:44:16, 67.16it/s] 

ep_rew_main =  [0.00075933]


  5%|▌         | 162000/3000000 [1:35:56<153:09:17,  5.15it/s]

test_rew_main =  0.002486605292372879


  5%|▌         | 164986/3000000 [1:37:02<9:36:57, 81.90it/s]  

ep_rew_main =  [0.00029026]
test_rew_main =  0.0003368519286184925


  6%|▌         | 167978/3000000 [1:38:22<11:45:02, 66.95it/s] 

ep_rew_main =  [0.00064655]


  6%|▌         | 168000/3000000 [1:38:34<146:24:12,  5.37it/s]

test_rew_main =  -1.6714001407681446e-05


  6%|▌         | 170991/3000000 [1:39:47<9:38:21, 81.52it/s]  

ep_rew_main =  [6.6941844e-05]
test_rew_main =  -0.009304537028844002


  6%|▌         | 173997/3000000 [1:41:28<9:52:46, 79.46it/s]  

ep_rew_main =  [-0.00346092]


  6%|▌         | 174000/3000000 [1:41:45<245:36:50,  3.20it/s]

test_rew_main =  9.031090124409413e-05


  6%|▌         | 176994/3000000 [1:43:06<10:47:53, 72.62it/s] 

ep_rew_main =  [-0.00649178]
test_rew_main =  0.009597068509568603


  6%|▌         | 179996/3000000 [1:44:31<9:41:38, 80.80it/s]  

ep_rew_main =  [-0.00557503]


  6%|▌         | 179996/3000000 [1:44:43<9:41:38, 80.80it/s]

test_rew_main =  0.002481555091801794
current_time =  2022-02-15T00:32:58.180489


  6%|▌         | 180000/3000000 [1:46:00<1295:53:51,  1.65s/it]

current_time =  2022-02-15T00:34:13.147256


  6%|▌         | 182998/3000000 [1:47:31<9:38:24, 81.17it/s]   

ep_rew_main =  [0.00036387]


  6%|▌         | 183000/3000000 [1:47:51<415:24:32,  1.88it/s]

test_rew_main =  -4.692099208462204e-05


  6%|▌         | 185991/3000000 [1:49:24<11:45:59, 66.43it/s] 

ep_rew_main =  [-0.01714531]


  6%|▌         | 186000/3000000 [1:49:40<237:13:45,  3.29it/s]

test_rew_main =  -0.02045440945326966


  6%|▋         | 188990/3000000 [1:51:06<12:29:00, 62.55it/s] 

ep_rew_main =  [-0.00545657]
test_rew_main =  -0.034820437997729284


  6%|▋         | 191981/3000000 [1:52:47<11:43:06, 66.56it/s] 

ep_rew_main =  [-0.07858454]


  6%|▋         | 192000/3000000 [1:53:04<212:25:31,  3.67it/s]

test_rew_main =  0.015244079630922367


  6%|▋         | 194988/3000000 [1:54:24<10:24:40, 74.84it/s] 

ep_rew_main =  [-0.00299044]


  6%|▋         | 195000/3000000 [1:54:41<289:49:23,  2.69it/s]

test_rew_main =  0.0014736206419899566


  7%|▋         | 197983/3000000 [1:56:00<10:51:11, 71.71it/s] 

ep_rew_main =  [0.01197644]
test_rew_main =  -0.0025510666321562


  7%|▋         | 200981/3000000 [1:57:28<11:17:17, 68.88it/s] 

ep_rew_main =  [-0.0591599]
test_rew_main =  0.023657213667407608


  7%|▋         | 203980/3000000 [1:58:49<11:10:01, 69.55it/s] 

ep_rew_main =  [-0.04289677]
test_rew_main =  0.04185770847875915


  7%|▋         | 206979/3000000 [2:00:12<11:34:33, 67.02it/s] 

ep_rew_main =  [0.15254454]


  7%|▋         | 207000/3000000 [2:00:23<129:16:49,  6.00it/s]

test_rew_main =  0.08168089300015517


  7%|▋         | 209993/3000000 [2:01:34<8:58:25, 86.36it/s]  

ep_rew_main =  [0.22694981]


  7%|▋         | 209993/3000000 [2:01:46<8:58:25, 86.36it/s]

test_rew_main =  0.053056181911886414
current_time =  2022-02-15T00:49:59.392603


  7%|▋         | 210000/3000000 [2:02:47<943:14:17,  1.22s/it]

current_time =  2022-02-15T00:51:00.198513


  7%|▋         | 212991/3000000 [2:03:57<11:18:32, 68.46it/s] 

ep_rew_main =  [0.12524384]
test_rew_main =  0.1082801821381322


  7%|▋         | 215985/3000000 [2:05:21<11:08:48, 69.38it/s] 

ep_rew_main =  [-0.02676793]
test_rew_main =  0.11132159223933828


  7%|▋         | 218985/3000000 [2:06:44<11:37:09, 66.49it/s] 

ep_rew_main =  [0.2623248]
test_rew_main =  0.06492424144559453


  7%|▋         | 221997/3000000 [2:08:07<9:36:49, 80.27it/s]  

ep_rew_main =  [0.3198747]
test_rew_main =  0.0012547622122847074


  7%|▋         | 224983/3000000 [2:09:32<11:27:44, 67.25it/s] 

ep_rew_main =  [0.3109276]
test_rew_main =  0.033563925989066455


  8%|▊         | 227998/3000000 [2:10:56<9:01:55, 85.25it/s]  

ep_rew_main =  [0.19566615]


  8%|▊         | 228000/3000000 [2:11:08<177:45:51,  4.33it/s]

test_rew_main =  0.1703012681536699


  8%|▊         | 230987/3000000 [2:12:21<11:27:40, 67.11it/s] 

ep_rew_main =  [0.79348963]
test_rew_main =  0.17100193216305742


  8%|▊         | 233995/3000000 [2:13:46<9:42:44, 79.11it/s]  

ep_rew_main =  [0.68388176]


  8%|▊         | 233995/3000000 [2:13:58<9:42:44, 79.11it/s]

test_rew_main =  0.28591838621589927


  8%|▊         | 236994/3000000 [2:15:12<9:38:25, 79.61it/s]  

ep_rew_main =  [0.57896984]
test_rew_main =  0.19793049724144718


  8%|▊         | 239976/3000000 [2:16:38<11:43:59, 65.34it/s] 

ep_rew_main =  [0.59476405]


  8%|▊         | 239976/3000000 [2:16:49<11:43:59, 65.34it/s]

test_rew_main =  0.22845094100566712
current_time =  2022-02-15T01:05:02.959882


  8%|▊         | 240000/3000000 [2:17:51<725:23:40,  1.06it/s]

current_time =  2022-02-15T01:06:04.220796


  8%|▊         | 242990/3000000 [2:19:07<11:50:09, 64.70it/s] 

ep_rew_main =  [0.15416697]


  8%|▊         | 242990/3000000 [2:19:19<11:50:09, 64.70it/s]

test_rew_main =  0.06963885460435901


  8%|▊         | 245986/3000000 [2:20:34<11:42:04, 65.38it/s] 

ep_rew_main =  [0.5680504]
test_rew_main =  0.21547873478452595


  8%|▊         | 248980/3000000 [2:22:03<11:59:43, 63.70it/s] 

ep_rew_main =  [0.42968366]
test_rew_main =  0.18932448041122424


  8%|▊         | 251983/3000000 [2:23:32<12:25:03, 61.47it/s] 

ep_rew_main =  [0.47045195]
test_rew_main =  0.28281707832057934


  8%|▊         | 254994/3000000 [2:25:01<9:54:48, 76.92it/s]  

ep_rew_main =  [0.45286548]
test_rew_main =  0.24580591806123592


  9%|▊         | 257990/3000000 [2:26:30<12:12:51, 62.36it/s] 

ep_rew_main =  [0.60414577]
test_rew_main =  0.29325927696858595


  9%|▊         | 260992/3000000 [2:28:01<11:57:58, 63.58it/s] 

ep_rew_main =  [0.45237258]
test_rew_main =  0.3286226912042203


  9%|▉         | 263989/3000000 [2:29:32<12:04:22, 62.95it/s] 

ep_rew_main =  [0.982454]
test_rew_main =  0.19053219796078047


  9%|▉         | 266979/3000000 [2:31:04<12:30:33, 60.69it/s] 

ep_rew_main =  [0.58780175]


  9%|▉         | 267000/3000000 [2:31:17<135:05:42,  5.62it/s]

test_rew_main =  0.24702344877239493


  9%|▉         | 269987/3000000 [2:32:36<12:06:56, 62.59it/s] 

ep_rew_main =  [0.758387]
test_rew_main =  0.03179930866677063
current_time =  2022-02-15T01:21:01.072118


  9%|▉         | 270000/3000000 [2:33:48<829:25:14,  1.09s/it]

current_time =  2022-02-15T01:22:01.377401


  9%|▉         | 272978/3000000 [2:35:09<13:00:07, 58.26it/s] 

ep_rew_main =  [0.99881184]


  9%|▉         | 273000/3000000 [2:35:21<128:03:11,  5.92it/s]

test_rew_main =  0.24939540158114165


  9%|▉         | 275995/3000000 [2:36:42<10:03:09, 75.27it/s] 

ep_rew_main =  [0.7234632]
test_rew_main =  0.17207748392301353


  9%|▉         | 278989/3000000 [2:38:16<14:11:25, 53.26it/s] 

ep_rew_main =  [0.32938004]
test_rew_main =  0.3490007150008564


  9%|▉         | 281987/3000000 [2:39:49<12:18:12, 61.37it/s] 

ep_rew_main =  [0.60385746]


  9%|▉         | 282000/3000000 [2:40:02<145:05:39,  5.20it/s]

test_rew_main =  0.1515938656602494


  9%|▉         | 284987/3000000 [2:41:24<12:04:31, 62.46it/s] 

ep_rew_main =  [1.0842923]
test_rew_main =  0.5066304113928527


 10%|▉         | 287998/3000000 [2:42:59<10:24:22, 72.39it/s] 

ep_rew_main =  [1.0359206]


 10%|▉         | 288000/3000000 [2:43:11<173:05:56,  4.35it/s]

test_rew_main =  0.40252585476053576


 10%|▉         | 290984/3000000 [2:44:34<12:49:54, 58.64it/s] 

ep_rew_main =  [1.1785657]
test_rew_main =  0.3155573945172927


 10%|▉         | 293987/3000000 [2:46:11<12:49:24, 58.62it/s] 

ep_rew_main =  [1.0334737]


 10%|▉         | 294000/3000000 [2:46:23<150:08:10,  5.01it/s]

test_rew_main =  0.30078012959759354


 10%|▉         | 296978/3000000 [2:47:48<13:06:45, 57.26it/s] 

ep_rew_main =  [1.183047]


 10%|▉         | 297000/3000000 [2:48:00<127:08:43,  5.91it/s]

test_rew_main =  0.1841296846938057


 10%|▉         | 299994/3000000 [2:49:25<10:43:40, 69.91it/s] 

ep_rew_main =  [0.8548708]
test_rew_main =  0.27985161206397074
current_time =  2022-02-15T01:37:49.611869


 10%|█         | 300000/3000000 [2:50:37<932:48:17,  1.24s/it]

current_time =  2022-02-15T01:38:50.025161


 10%|█         | 302998/3000000 [2:52:03<10:19:06, 72.60it/s] 

ep_rew_main =  [1.0109262]
test_rew_main =  0.3467203725133453


 10%|█         | 305980/3000000 [2:53:41<13:12:54, 56.63it/s] 

ep_rew_main =  [1.0982099]


 10%|█         | 306000/3000000 [2:53:53<134:21:10,  5.57it/s]

test_rew_main =  0.18165387460177293


 10%|█         | 308991/3000000 [2:55:19<10:48:38, 69.14it/s] 

ep_rew_main =  [0.74488044]
test_rew_main =  0.26736338519986513


 10%|█         | 311997/3000000 [2:56:58<10:23:08, 71.89it/s] 

ep_rew_main =  [1.0355728]
test_rew_main =  0.18761801747403717


 10%|█         | 314978/3000000 [2:58:38<13:18:27, 56.05it/s] 

ep_rew_main =  [1.0593033]


 10%|█         | 315000/3000000 [2:58:50<125:24:20,  5.95it/s]

test_rew_main =  0.36098746678236393


 11%|█         | 317987/3000000 [3:00:18<13:08:05, 56.72it/s] 

ep_rew_main =  [1.4482638]
test_rew_main =  0.29687237906407093


 11%|█         | 320986/3000000 [3:01:58<13:33:12, 54.91it/s] 

ep_rew_main =  [0.8779402]
test_rew_main =  0.39648568038011806


 11%|█         | 323997/3000000 [3:03:38<10:35:40, 70.16it/s] 

ep_rew_main =  [1.4226637]
test_rew_main =  0.2894008397415283


 11%|█         | 326980/3000000 [3:05:20<13:50:56, 53.61it/s] 

ep_rew_main =  [1.2214856]
test_rew_main =  0.5319165205370837


 11%|█         | 329998/3000000 [3:07:02<10:22:17, 71.51it/s] 

ep_rew_main =  [0.84312785]


 11%|█         | 329998/3000000 [3:07:13<10:22:17, 71.51it/s]

test_rew_main =  0.41821472784256997
current_time =  2022-02-15T01:55:27.160693


 11%|█         | 330000/3000000 [3:08:15<935:32:47,  1.26s/it]

current_time =  2022-02-15T01:56:28.027770


 11%|█         | 332981/3000000 [3:09:44<13:52:13, 53.41it/s] 

ep_rew_main =  [1.5247394]
test_rew_main =  0.36785079158065975


 11%|█         | 335988/3000000 [3:11:28<13:22:27, 55.33it/s] 

ep_rew_main =  [1.1967537]
test_rew_main =  0.3644308597178939


 11%|█▏        | 338991/3000000 [3:13:12<13:11:59, 56.00it/s] 

ep_rew_main =  [0.40393317]
test_rew_main =  0.3115954818871108


 11%|█▏        | 341987/3000000 [3:14:55<13:19:13, 55.43it/s] 

ep_rew_main =  [1.7850356]
test_rew_main =  0.3920550260359713


 11%|█▏        | 344979/3000000 [3:16:40<13:52:16, 53.17it/s] 

ep_rew_main =  [1.1493037]
test_rew_main =  0.49407031928903794


 12%|█▏        | 347979/3000000 [3:18:24<14:18:06, 51.51it/s] 

ep_rew_main =  [1.1929367]


 12%|█▏        | 348000/3000000 [3:18:36<124:41:33,  5.91it/s]

test_rew_main =  0.3855028726528224


 12%|█▏        | 350991/3000000 [3:20:10<13:39:20, 53.89it/s] 

ep_rew_main =  [1.1035165]
test_rew_main =  0.2898144585092391


 12%|█▏        | 353999/3000000 [3:21:57<11:01:24, 66.68it/s] 

ep_rew_main =  [1.7589958]
test_rew_main =  0.48073982704113616


 12%|█▏        | 356980/3000000 [3:23:43<13:57:04, 52.62it/s] 

ep_rew_main =  [1.7086915]


 12%|█▏        | 357000/3000000 [3:23:55<125:28:48,  5.85it/s]

test_rew_main =  0.5456758489069021


 12%|█▏        | 359982/3000000 [3:25:30<14:03:25, 52.17it/s] 

ep_rew_main =  [1.5420706]
test_rew_main =  0.41984560633266055
current_time =  2022-02-15T02:13:54.731696


 12%|█▏        | 360000/3000000 [3:26:41<716:07:14,  1.02it/s]

current_time =  2022-02-15T02:14:54.605276


 12%|█▏        | 362983/3000000 [3:28:18<14:32:19, 50.38it/s] 

ep_rew_main =  [1.6582181]
test_rew_main =  0.42025544079463034


 12%|█▏        | 365998/3000000 [3:30:07<11:22:19, 64.34it/s] 

ep_rew_main =  [1.8178585]
test_rew_main =  0.4001724229075144


 12%|█▏        | 368990/3000000 [3:31:55<14:07:15, 51.76it/s] 

ep_rew_main =  [1.7270024]


 12%|█▏        | 369000/3000000 [3:32:08<158:01:30,  4.62it/s]

test_rew_main =  0.46092391451089443


 12%|█▏        | 371978/3000000 [3:33:45<14:34:44, 50.07it/s] 

ep_rew_main =  [1.7498497]


 12%|█▏        | 372000/3000000 [3:33:57<124:49:27,  5.85it/s]

test_rew_main =  0.4442391857587908


 12%|█▏        | 374998/3000000 [3:35:35<11:07:51, 65.51it/s] 

ep_rew_main =  [1.4885246]


 12%|█▎        | 375000/3000000 [3:35:47<167:02:53,  4.37it/s]

test_rew_main =  0.32344807321706665


 13%|█▎        | 377978/3000000 [3:37:26<14:13:50, 51.18it/s] 

ep_rew_main =  [1.4948378]


 13%|█▎        | 378000/3000000 [3:37:38<119:06:48,  6.11it/s]

test_rew_main =  0.4061637642825307


 13%|█▎        | 380984/3000000 [3:39:17<14:41:24, 49.52it/s] 

ep_rew_main =  [1.8320445]


 13%|█▎        | 381000/3000000 [3:39:29<139:46:19,  5.20it/s]

test_rew_main =  0.44865877962198875


 13%|█▎        | 383984/3000000 [3:41:09<15:25:33, 47.11it/s] 

ep_rew_main =  [1.2034104]
test_rew_main =  0.5255202154446972


 13%|█▎        | 386986/3000000 [3:43:02<15:07:56, 47.97it/s] 

ep_rew_main =  [1.4533582]
test_rew_main =  0.5054430233252005


 13%|█▎        | 389993/3000000 [3:44:55<14:06:51, 51.37it/s] 

ep_rew_main =  [1.3479922]
test_rew_main =  0.5980317670598883
current_time =  2022-02-15T02:33:20.124858


 13%|█▎        | 390000/3000000 [3:46:07<859:53:10,  1.19s/it]

current_time =  2022-02-15T02:34:20.554976


 13%|█▎        | 392984/3000000 [3:47:48<15:15:12, 47.48it/s] 

ep_rew_main =  [1.5068293]


 13%|█▎        | 393000/3000000 [3:48:01<141:57:27,  5.10it/s]

test_rew_main =  0.48321560687237647


 13%|█▎        | 395988/3000000 [3:49:43<14:54:31, 48.52it/s] 

ep_rew_main =  [1.8004005]
test_rew_main =  0.5392291425763149


 13%|█▎        | 398984/3000000 [3:51:38<17:30:14, 41.28it/s] 

ep_rew_main =  [1.7768568]


 13%|█▎        | 399000/3000000 [3:51:50<134:28:57,  5.37it/s]

test_rew_main =  0.5639609552758427


 13%|█▎        | 401997/3000000 [3:53:32<11:47:37, 61.19it/s] 

ep_rew_main =  [1.7628746]
test_rew_main =  0.49819690171616376


 13%|█▎        | 404991/3000000 [3:55:27<14:46:56, 48.76it/s] 

ep_rew_main =  [1.3602122]
test_rew_main =  0.5104633089631202


 14%|█▎        | 407980/3000000 [3:57:23<15:39:05, 46.00it/s] 

ep_rew_main =  [1.5083401]
test_rew_main =  0.5456349359711444


 14%|█▎        | 410991/3000000 [3:59:20<14:35:01, 49.31it/s] 

ep_rew_main =  [1.6180148]


 14%|█▎        | 411000/3000000 [3:59:32<146:08:06,  4.92it/s]

test_rew_main =  0.47619084860526845


 14%|█▍        | 413982/3000000 [4:01:17<15:06:28, 47.55it/s] 

ep_rew_main =  [1.3677952]
test_rew_main =  0.4014940524127154


 14%|█▍        | 416998/3000000 [4:03:15<12:26:06, 57.70it/s] 

ep_rew_main =  [1.4661186]
test_rew_main =  0.34441205699921157


 14%|█▍        | 419998/3000000 [4:05:12<12:26:36, 57.59it/s] 

ep_rew_main =  [1.845304]
test_rew_main =  0.3616801962746351
current_time =  2022-02-15T02:53:37.372904


 14%|█▍        | 420000/3000000 [4:06:25<953:01:38,  1.33s/it]

current_time =  2022-02-15T02:54:38.053864


 14%|█▍        | 422982/3000000 [4:08:12<15:14:37, 46.96it/s] 

ep_rew_main =  [1.7824576]
test_rew_main =  0.5911651795960289


 14%|█▍        | 425980/3000000 [4:10:11<15:32:41, 46.00it/s] 

ep_rew_main =  [1.6854012]


 14%|█▍        | 426000/3000000 [4:10:23<122:28:57,  5.84it/s]

test_rew_main =  0.4310286517718975


 14%|█▍        | 428980/3000000 [4:12:10<15:28:38, 46.14it/s] 

ep_rew_main =  [1.8687649]


 14%|█▍        | 429000/3000000 [4:12:22<122:38:41,  5.82it/s]

test_rew_main =  0.6382396088815883


 14%|█▍        | 431997/3000000 [4:14:11<12:47:03, 55.80it/s] 

ep_rew_main =  [1.699933]


 14%|█▍        | 432000/3000000 [4:14:23<165:36:47,  4.31it/s]

test_rew_main =  0.5828027197719461


 14%|█▍        | 434998/3000000 [4:16:12<12:23:33, 57.49it/s] 

ep_rew_main =  [1.6510714]


 14%|█▍        | 435000/3000000 [4:16:24<159:25:26,  4.47it/s]

test_rew_main =  0.5181932507097787


 15%|█▍        | 437984/3000000 [4:18:13<15:17:34, 46.54it/s] 

ep_rew_main =  [0.9808247]
test_rew_main =  0.5732983395437641


 15%|█▍        | 440989/3000000 [4:20:15<15:40:49, 45.33it/s] 

ep_rew_main =  [1.6398842]
test_rew_main =  0.3939752289840174


 15%|█▍        | 443998/3000000 [4:22:19<12:42:57, 55.84it/s] 

ep_rew_main =  [1.9290984]
test_rew_main =  0.5338068147640899


 15%|█▍        | 446978/3000000 [4:24:21<16:18:49, 43.47it/s] 

ep_rew_main =  [2.0342245]


 15%|█▍        | 447000/3000000 [4:24:33<124:03:59,  5.72it/s]

test_rew_main =  0.580807576988861


 15%|█▍        | 449988/3000000 [4:26:25<15:42:26, 45.10it/s] 

ep_rew_main =  [1.7181424]
test_rew_main =  0.6739403670358943
current_time =  2022-02-15T03:14:50.110575


 15%|█▌        | 450000/3000000 [4:27:37<767:00:22,  1.08s/it]

current_time =  2022-02-15T03:15:50.613694


 15%|█▌        | 452994/3000000 [4:29:31<12:38:22, 55.98it/s] 

ep_rew_main =  [1.9336782]
test_rew_main =  0.5312464861688186


 15%|█▌        | 455998/3000000 [4:31:36<13:20:13, 52.99it/s] 

ep_rew_main =  [1.7611499]
test_rew_main =  0.5957620204123556


 15%|█▌        | 458997/3000000 [4:33:41<12:46:33, 55.25it/s] 

ep_rew_main =  [2.0722299]
test_rew_main =  0.511982749892903


 15%|█▌        | 461982/3000000 [4:35:47<15:48:48, 44.58it/s] 

ep_rew_main =  [1.9771321]
test_rew_main =  0.6462593242052957


 15%|█▌        | 464982/3000000 [4:37:53<16:41:14, 42.20it/s] 

ep_rew_main =  [1.7191801]


 16%|█▌        | 465000/3000000 [4:38:06<135:29:17,  5.20it/s]

test_rew_main =  0.6739594651385172


 16%|█▌        | 467997/3000000 [4:40:01<12:50:16, 54.79it/s] 

ep_rew_main =  [1.5958121]
test_rew_main =  0.3763620599744468


 16%|█▌        | 470984/3000000 [4:42:09<16:02:54, 43.77it/s] 

ep_rew_main =  [1.5130348]
test_rew_main =  0.6570716935301654


 16%|█▌        | 473984/3000000 [4:44:17<16:24:14, 42.77it/s] 

ep_rew_main =  [1.7931268]
test_rew_main =  0.5693041975306781


 16%|█▌        | 476985/3000000 [4:46:25<16:06:23, 43.51it/s] 

ep_rew_main =  [1.23382]


 16%|█▌        | 477000/3000000 [4:46:37<132:49:17,  5.28it/s]

test_rew_main =  0.48243705023090416


 16%|█▌        | 479997/3000000 [4:48:35<12:52:16, 54.39it/s] 

ep_rew_main =  [1.9742124]


 16%|█▌        | 479997/3000000 [4:48:46<12:52:16, 54.39it/s]

test_rew_main =  0.6071783971515897
current_time =  2022-02-15T03:36:59.360385


 16%|█▌        | 480000/3000000 [4:49:47<882:00:54,  1.26s/it]

current_time =  2022-02-15T03:38:00.259850


 16%|█▌        | 482998/3000000 [4:51:45<12:33:48, 55.65it/s] 

ep_rew_main =  [1.7908856]


 16%|█▌        | 483000/3000000 [4:51:58<170:30:54,  4.10it/s]

test_rew_main =  0.6240745599370419


 16%|█▌        | 485992/3000000 [4:53:56<15:35:15, 44.80it/s] 

ep_rew_main =  [1.9997199]


 16%|█▌        | 486000/3000000 [4:54:08<146:26:06,  4.77it/s]

test_rew_main =  0.5122035446561618


 16%|█▋        | 488992/3000000 [4:56:07<16:03:36, 43.43it/s] 

ep_rew_main =  [1.9543431]
test_rew_main =  0.6382822843621158


 16%|█▋        | 491990/3000000 [4:58:19<15:53:50, 43.82it/s] 

ep_rew_main =  [1.7265868]
test_rew_main =  0.562567596056407


 16%|█▋        | 494998/3000000 [5:00:29<13:05:46, 53.13it/s] 

ep_rew_main =  [0.99659497]
test_rew_main =  0.45699477074102407


 17%|█▋        | 497988/3000000 [5:02:42<16:06:27, 43.15it/s] 

ep_rew_main =  [1.7822094]
test_rew_main =  0.5070624574888546


 17%|█▋        | 500980/3000000 [5:04:55<16:28:02, 42.15it/s] 

ep_rew_main =  [1.3555617]


 17%|█▋        | 500980/3000000 [5:05:08<16:28:02, 42.15it/s]

test_rew_main =  0.6267644714768449


 17%|█▋        | 503989/3000000 [5:07:10<16:34:39, 41.82it/s] 

ep_rew_main =  [1.5511441]
test_rew_main =  0.6499771330596127


 17%|█▋        | 506987/3000000 [5:09:23<16:21:40, 42.33it/s] 

ep_rew_main =  [1.908678]
test_rew_main =  0.5434914192743204


 17%|█▋        | 509991/3000000 [5:11:39<16:11:34, 42.71it/s] 

ep_rew_main =  [1.6115983]
test_rew_main =  0.496146731597556
current_time =  2022-02-15T04:00:03.525304


 17%|█▋        | 510000/3000000 [5:12:52<790:16:46,  1.14s/it]

current_time =  2022-02-15T04:01:04.959034


 17%|█▋        | 512991/3000000 [5:14:54<16:19:02, 42.34it/s] 

ep_rew_main =  [1.4874852]
test_rew_main =  0.687110260082293


 17%|█▋        | 515979/3000000 [5:17:12<17:48:11, 38.76it/s] 

ep_rew_main =  [1.6935453]


 17%|█▋        | 516000/3000000 [5:17:23<115:16:28,  5.99it/s]

test_rew_main =  0.5801408832307928


 17%|█▋        | 518977/3000000 [5:19:28<17:28:30, 39.44it/s] 

ep_rew_main =  [1.8968024]


 17%|█▋        | 519000/3000000 [5:19:41<123:13:31,  5.59it/s]

test_rew_main =  0.6389481615709192


 17%|█▋        | 521986/3000000 [5:21:46<17:21:55, 39.64it/s] 

ep_rew_main =  [1.7068291]
test_rew_main =  0.6183621209522284


 17%|█▋        | 524983/3000000 [5:24:03<17:02:00, 40.36it/s] 

ep_rew_main =  [1.9864516]
test_rew_main =  0.6531212593713682


 18%|█▊        | 527983/3000000 [5:26:21<17:30:23, 39.22it/s] 

ep_rew_main =  [1.9728727]
test_rew_main =  0.4067386453937637


 18%|█▊        | 530993/3000000 [5:28:41<16:44:58, 40.95it/s] 

ep_rew_main =  [0.97061324]


 18%|█▊        | 531000/3000000 [5:28:53<149:54:11,  4.58it/s]

test_rew_main =  0.6107702176871513


 18%|█▊        | 533980/3000000 [5:30:59<17:46:24, 38.54it/s] 

ep_rew_main =  [1.756505]


 18%|█▊        | 533980/3000000 [5:31:11<17:46:24, 38.54it/s]

test_rew_main =  0.5245422015387884


 18%|█▊        | 536998/3000000 [5:33:19<13:24:12, 51.04it/s] 

ep_rew_main =  [1.9407964]
test_rew_main =  0.5553706198992986


 18%|█▊        | 539977/3000000 [5:35:40<17:50:01, 38.32it/s] 

ep_rew_main =  [1.5931422]


 18%|█▊        | 539977/3000000 [5:35:51<17:50:01, 38.32it/s]

test_rew_main =  0.5202599791017122
current_time =  2022-02-15T04:24:05.195444


 18%|█▊        | 540000/3000000 [5:36:52<642:46:01,  1.06it/s]

current_time =  2022-02-15T04:25:05.306799


 18%|█▊        | 542983/3000000 [5:39:01<18:05:43, 37.72it/s] 

ep_rew_main =  [2.172163]


 18%|█▊        | 543000/3000000 [5:39:13<136:21:35,  5.01it/s]

test_rew_main =  0.5046468607611694


 18%|█▊        | 545979/3000000 [5:41:23<17:45:08, 38.40it/s] 

ep_rew_main =  [1.3003297]


 18%|█▊        | 546000/3000000 [5:41:35<121:44:15,  5.60it/s]

test_rew_main =  0.5944894722765841


 18%|█▊        | 548987/3000000 [5:43:44<17:10:03, 39.66it/s] 

ep_rew_main =  [1.8783867]
test_rew_main =  0.6408438694084568


 18%|█▊        | 551993/3000000 [5:46:08<14:01:52, 48.46it/s] 

ep_rew_main =  [1.9257803]
test_rew_main =  0.5873360376751672


 18%|█▊        | 554983/3000000 [5:48:30<18:02:32, 37.64it/s] 

ep_rew_main =  [1.8016745]
test_rew_main =  0.6177179200957619


 19%|█▊        | 557989/3000000 [5:50:53<17:40:43, 38.37it/s] 

ep_rew_main =  [1.2217878]


 19%|█▊        | 558000/3000000 [5:51:05<142:45:13,  4.75it/s]

test_rew_main =  0.5426703487105529


 19%|█▊        | 560977/3000000 [5:53:17<17:46:27, 38.12it/s] 

ep_rew_main =  [2.1405323]


 19%|█▊        | 561000/3000000 [5:53:28<109:39:20,  6.18it/s]

test_rew_main =  0.558927525234234


 19%|█▉        | 563987/3000000 [5:55:40<17:08:10, 39.49it/s] 

ep_rew_main =  [1.8099842]
test_rew_main =  0.5128760622457345


 19%|█▉        | 566987/3000000 [5:58:05<17:35:14, 38.43it/s] 

ep_rew_main =  [1.8240627]
test_rew_main =  0.6154132712869923


 19%|█▉        | 569980/3000000 [6:00:31<17:53:18, 37.73it/s] 

ep_rew_main =  [1.8562168]
test_rew_main =  0.5517113942391146
current_time =  2022-02-15T04:48:55.549991


 19%|█▉        | 570000/3000000 [6:01:44<662:58:52,  1.02it/s]

current_time =  2022-02-15T04:49:57.171480


 19%|█▉        | 572998/3000000 [6:03:58<14:10:53, 47.54it/s] 

ep_rew_main =  [1.7120452]
test_rew_main =  0.32491253862324454


 19%|█▉        | 575997/3000000 [6:06:25<13:35:18, 49.55it/s] 

ep_rew_main =  [1.8595018]
test_rew_main =  0.6061372446006177


 19%|█▉        | 578982/3000000 [6:08:53<18:25:10, 36.51it/s] 

ep_rew_main =  [1.5390043]
test_rew_main =  0.5661081510504333


 19%|█▉        | 581983/3000000 [6:11:20<18:33:22, 36.20it/s] 

ep_rew_main =  [1.7989475]
test_rew_main =  0.5366219009343303


 19%|█▉        | 584997/3000000 [6:13:48<13:58:34, 48.00it/s] 

ep_rew_main =  [2.0090642]


 20%|█▉        | 585000/3000000 [6:14:00<152:16:59,  4.41it/s]

test_rew_main =  0.5520804641521916


 20%|█▉        | 587990/3000000 [6:16:16<17:31:15, 38.24it/s] 

ep_rew_main =  [1.8731126]
test_rew_main =  0.6292144775961167


 20%|█▉        | 590987/3000000 [6:18:46<17:56:08, 37.31it/s] 

ep_rew_main =  [2.045567]
test_rew_main =  0.494838546088192


 20%|█▉        | 593994/3000000 [6:21:15<14:40:00, 45.57it/s] 

ep_rew_main =  [2.1068745]
test_rew_main =  0.6313138457195869


 20%|█▉        | 596994/3000000 [6:23:46<17:16:33, 38.64it/s] 

ep_rew_main =  [1.9613693]
test_rew_main =  0.46781137864839833


 20%|█▉        | 599984/3000000 [6:26:16<18:07:33, 36.78it/s] 

ep_rew_main =  [1.2779878]
test_rew_main =  0.5664128677627216
current_time =  2022-02-15T05:14:41.555533


 20%|██        | 600000/3000000 [6:27:28<676:47:38,  1.02s/it]

current_time =  2022-02-15T05:15:41.316840


 20%|██        | 602989/3000000 [6:29:47<17:54:36, 37.18it/s] 

ep_rew_main =  [1.8588221]


 20%|██        | 603000/3000000 [6:30:00<147:28:57,  4.51it/s]

test_rew_main =  0.5236501223090534


 20%|██        | 605983/3000000 [6:32:20<18:07:55, 36.68it/s] 

ep_rew_main =  [1.715456]


 20%|██        | 606000/3000000 [6:32:32<129:26:24,  5.14it/s]

test_rew_main =  0.4864920114422168


 20%|██        | 608998/3000000 [6:34:53<14:13:31, 46.69it/s] 

ep_rew_main =  [1.9450186]
test_rew_main =  0.5958680061987718


 20%|██        | 611997/3000000 [6:37:27<14:42:42, 45.09it/s] 

ep_rew_main =  [1.5418591]
test_rew_main =  0.6047941815380243


 20%|██        | 614997/3000000 [6:40:02<14:44:31, 44.94it/s] 

ep_rew_main =  [1.8451422]
test_rew_main =  0.546229758738042


 21%|██        | 617990/3000000 [6:42:38<18:16:51, 36.19it/s] 

ep_rew_main =  [1.9113681]
test_rew_main =  0.6358084895919871


 21%|██        | 620980/3000000 [6:45:12<18:51:34, 35.04it/s] 

ep_rew_main =  [2.0160296]
test_rew_main =  0.5132627276627746


 21%|██        | 623990/3000000 [6:47:48<15:07:35, 43.63it/s] 

ep_rew_main =  [1.6374199]


 21%|██        | 623990/3000000 [6:48:00<15:07:35, 43.63it/s]

test_rew_main =  0.7344311686072247


 21%|██        | 626992/3000000 [6:50:23<17:29:27, 37.69it/s] 

ep_rew_main =  [1.9328148]
test_rew_main =  0.5609077830006782


 21%|██        | 629994/3000000 [6:52:59<17:25:13, 37.79it/s] 

ep_rew_main =  [1.9713078]


 21%|██        | 629994/3000000 [6:53:11<17:25:13, 37.79it/s]

test_rew_main =  0.52436994297048
current_time =  2022-02-15T05:41:24.663924


 21%|██        | 630000/3000000 [6:54:12<794:08:45,  1.21s/it]

current_time =  2022-02-15T05:42:25.037439


 21%|██        | 632998/3000000 [6:56:38<15:02:21, 43.72it/s] 

ep_rew_main =  [1.9722129]
test_rew_main =  0.45139464502805815


 21%|██        | 635979/3000000 [6:59:16<19:11:07, 34.23it/s] 

ep_rew_main =  [1.5651364]


 21%|██        | 636000/3000000 [6:59:29<122:42:33,  5.35it/s]

test_rew_main =  0.5270887708744906


 21%|██▏       | 638976/3000000 [7:01:54<19:13:54, 34.10it/s] 

ep_rew_main =  [1.6641865]


 21%|██▏       | 639000/3000000 [7:02:06<117:54:11,  5.56it/s]

test_rew_main =  0.6079493467104344


 21%|██▏       | 641994/3000000 [7:04:32<17:58:24, 36.44it/s] 

ep_rew_main =  [1.6240934]


 21%|██▏       | 642000/3000000 [7:04:45<149:22:52,  4.38it/s]

test_rew_main =  0.6283629930846218


 21%|██▏       | 644992/3000000 [7:07:12<14:56:32, 43.78it/s] 

ep_rew_main =  [1.0617728]
test_rew_main =  0.5175797698748799


 22%|██▏       | 647986/3000000 [7:09:52<18:56:25, 34.49it/s] 

ep_rew_main =  [1.9203165]


 22%|██▏       | 648000/3000000 [7:10:05<136:05:05,  4.80it/s]

test_rew_main =  0.4483614684869405


 22%|██▏       | 650979/3000000 [7:12:33<19:37:46, 33.24it/s] 

ep_rew_main =  [2.0112376]


 22%|██▏       | 651000/3000000 [7:12:46<123:35:57,  5.28it/s]

test_rew_main =  0.4814212469965488


 22%|██▏       | 653987/3000000 [7:15:15<18:42:51, 34.82it/s] 

ep_rew_main =  [1.7218965]
test_rew_main =  0.4851666215549638


 22%|██▏       | 656976/3000000 [7:17:57<18:50:42, 34.54it/s] 

ep_rew_main =  [1.073827]


 22%|██▏       | 657000/3000000 [7:18:10<116:38:16,  5.58it/s]

test_rew_main =  0.5752332842654544


 22%|██▏       | 659989/3000000 [7:20:41<18:19:31, 35.47it/s] 

ep_rew_main =  [1.3563375]
test_rew_main =  0.3076416529380973
current_time =  2022-02-15T06:09:05.542658


 22%|██▏       | 660000/3000000 [7:21:54<747:23:27,  1.15s/it]

current_time =  2022-02-15T06:10:07.539189


 22%|██▏       | 662989/3000000 [7:24:25<19:36:07, 33.12it/s] 

ep_rew_main =  [0.5507611]


 22%|██▏       | 663000/3000000 [7:24:38<150:08:55,  4.32it/s]

test_rew_main =  0.3408411137742541


 22%|██▏       | 665994/3000000 [7:27:09<15:11:33, 42.67it/s] 

ep_rew_main =  [0.7282465]
test_rew_main =  0.0985369115272546


 22%|██▏       | 668979/3000000 [7:29:53<19:15:16, 33.63it/s] 

ep_rew_main =  [1.9707463]


 22%|██▏       | 669000/3000000 [7:30:07<132:07:31,  4.90it/s]

test_rew_main =  0.1046569870130433


 22%|██▏       | 671980/3000000 [7:32:39<19:22:27, 33.38it/s] 

ep_rew_main =  [0.30813736]
test_rew_main =  0.3802192782743076


 22%|██▏       | 674999/3000000 [7:35:26<15:14:28, 42.37it/s] 

ep_rew_main =  [0.3909796]


 22%|██▎       | 675000/3000000 [7:35:38<161:37:51,  4.00it/s]

test_rew_main =  0.26949155549354514


 23%|██▎       | 677998/3000000 [7:38:13<15:09:29, 42.55it/s] 

ep_rew_main =  [0.7222539]
test_rew_main =  0.6262325856075313


 23%|██▎       | 680997/3000000 [7:41:00<15:31:49, 41.48it/s] 

ep_rew_main =  [0.65914]
test_rew_main =  0.3984756176659032


 23%|██▎       | 683999/3000000 [7:43:46<15:04:28, 42.68it/s] 

ep_rew_main =  [1.3858263]


 23%|██▎       | 684000/3000000 [7:43:59<161:47:40,  3.98it/s]

test_rew_main =  0.3470542646677557


 23%|██▎       | 686987/3000000 [7:46:33<19:08:35, 33.56it/s] 

ep_rew_main =  [1.2613097]
test_rew_main =  0.4592377033955565


 23%|██▎       | 689976/3000000 [7:49:30<20:57:02, 30.63it/s] 

ep_rew_main =  [1.2727591]
test_rew_main =  0.26075135803188815
current_time =  2022-02-15T06:37:57.372772


 23%|██▎       | 690000/3000000 [7:50:46<647:59:18,  1.01s/it]

current_time =  2022-02-15T06:38:59.432057


 23%|██▎       | 692987/3000000 [7:53:30<20:31:59, 31.21it/s] 

ep_rew_main =  [1.3739623]
test_rew_main =  0.4282979106282415


 23%|██▎       | 695987/3000000 [7:56:26<20:30:21, 31.21it/s] 

ep_rew_main =  [1.7699449]
test_rew_main =  0.6570850496062024


 23%|██▎       | 698998/3000000 [7:59:23<16:26:04, 38.89it/s] 

ep_rew_main =  [1.4388335]
test_rew_main =  0.6176621103706312


 23%|██▎       | 701986/3000000 [8:02:20<20:15:44, 31.50it/s] 

ep_rew_main =  [0.6641275]
test_rew_main =  0.40238402514283944


 23%|██▎       | 704993/3000000 [8:05:18<15:49:02, 40.30it/s] 

ep_rew_main =  [1.3856558]


 24%|██▎       | 705000/3000000 [8:05:31<152:32:45,  4.18it/s]

test_rew_main =  0.5705193625302962


 24%|██▎       | 707989/3000000 [8:08:16<20:13:33, 31.48it/s] 

ep_rew_main =  [0.61674243]


 24%|██▎       | 708000/3000000 [8:08:29<144:44:16,  4.40it/s]

test_rew_main =  0.6680049702514042


 24%|██▎       | 710993/3000000 [8:11:15<16:15:48, 39.10it/s] 

ep_rew_main =  [0.8970235]
test_rew_main =  0.42212940005412725


 24%|██▍       | 713997/3000000 [8:14:14<16:20:35, 38.85it/s] 

ep_rew_main =  [1.1703898]
test_rew_main =  0.34613630816962215


 24%|██▍       | 716985/3000000 [8:17:14<20:33:27, 30.85it/s] 

ep_rew_main =  [1.993183]
test_rew_main =  0.6553609644260744


 24%|██▍       | 719985/3000000 [8:20:14<21:22:04, 29.64it/s] 

ep_rew_main =  [1.742775]
test_rew_main =  0.44067195989830826
current_time =  2022-02-15T07:08:39.493894


 24%|██▍       | 720000/3000000 [8:21:30<728:01:02,  1.15s/it]

current_time =  2022-02-15T07:09:42.783876


 24%|██▍       | 722985/3000000 [8:24:18<17:51:02, 35.43it/s] 

ep_rew_main =  [1.2517645]


 24%|██▍       | 722985/3000000 [8:24:32<17:51:02, 35.43it/s]

test_rew_main =  0.6775567021605549


 24%|██▍       | 725987/3000000 [8:27:20<20:58:37, 30.11it/s] 

ep_rew_main =  [1.9728084]


 24%|██▍       | 726000/3000000 [8:27:34<144:23:44,  4.37it/s]

test_rew_main =  0.6568143307517685


 24%|██▍       | 728996/3000000 [8:30:22<16:51:12, 37.43it/s] 

ep_rew_main =  [2.0420017]


 24%|██▍       | 729000/3000000 [8:30:35<174:15:53,  3.62it/s]

test_rew_main =  0.45408767877278705


 24%|██▍       | 731986/3000000 [8:33:25<21:10:49, 29.74it/s] 

ep_rew_main =  [1.3714138]
test_rew_main =  0.5748365330578827


 24%|██▍       | 734997/3000000 [8:36:27<16:39:27, 37.77it/s] 

ep_rew_main =  [2.093937]
test_rew_main =  0.6630803072039608


 25%|██▍       | 737993/3000000 [8:39:31<16:22:40, 38.36it/s] 

ep_rew_main =  [2.1213257]


 25%|██▍       | 738000/3000000 [8:39:44<151:35:06,  4.15it/s]

test_rew_main =  0.6520303121956281


 25%|██▍       | 740999/3000000 [8:42:34<16:59:02, 36.95it/s] 

ep_rew_main =  [0.9876577]
test_rew_main =  0.5032741503880646


 25%|██▍       | 743999/3000000 [8:45:40<16:31:42, 37.91it/s] 

ep_rew_main =  [1.8813283]
test_rew_main =  0.6590933894354665


 25%|██▍       | 746996/3000000 [8:48:45<16:55:21, 36.98it/s] 

ep_rew_main =  [1.2749488]
test_rew_main =  0.46346289008515845


 25%|██▍       | 749997/3000000 [8:51:53<15:57:40, 39.16it/s] 

ep_rew_main =  [1.723299]


 25%|██▍       | 749997/3000000 [8:52:04<15:57:40, 39.16it/s]

test_rew_main =  0.2035355620929878
current_time =  2022-02-15T07:40:19.096284


 25%|██▌       | 750000/3000000 [8:53:06<831:14:13,  1.33s/it]

current_time =  2022-02-15T07:41:18.998329


 25%|██▌       | 752997/3000000 [8:55:47<18:21:59, 33.98it/s] 

ep_rew_main =  [1.0433288]
test_rew_main =  0.44301424178728277


 25%|██▌       | 755994/3000000 [8:58:39<18:11:12, 34.27it/s] 

ep_rew_main =  [1.6100668]
test_rew_main =  0.6330471280018519


 25%|██▌       | 758996/3000000 [9:01:32<19:24:18, 32.08it/s] 

ep_rew_main =  [1.846154]
test_rew_main =  0.5398930708209531


 25%|██▌       | 761982/3000000 [9:04:29<20:05:22, 30.95it/s] 

ep_rew_main =  [1.4154122]
test_rew_main =  0.6316680075240128


 25%|██▌       | 764998/3000000 [9:07:29<15:48:46, 39.26it/s] 

ep_rew_main =  [1.1053976]
test_rew_main =  0.4626292911301449


 26%|██▌       | 767989/3000000 [9:10:40<17:33:03, 35.33it/s] 

ep_rew_main =  [1.5296676]
test_rew_main =  0.48681936368673817


 26%|██▌       | 770994/3000000 [9:13:59<18:02:26, 34.32it/s] 

ep_rew_main =  [2.0500865]
test_rew_main =  0.654400464771481


 26%|██▌       | 773995/3000000 [9:17:31<17:44:28, 34.85it/s] 

ep_rew_main =  [0.6349703]
test_rew_main =  0.5393587509373414


 26%|██▌       | 776996/3000000 [9:20:57<15:38:14, 39.49it/s] 

ep_rew_main =  [1.2462664]
test_rew_main =  0.31042188676038396


 26%|██▌       | 779997/3000000 [9:24:52<17:29:22, 35.26it/s] 

ep_rew_main =  [1.9450471]
test_rew_main =  0.4890487352556037
current_time =  2022-02-15T08:13:20.367770


 26%|██▌       | 780000/3000000 [9:26:19<1426:43:54,  2.31s/it]

current_time =  2022-02-15T08:14:32.265117


 26%|██▌       | 782985/3000000 [9:29:24<20:04:00, 30.69it/s]  

ep_rew_main =  [1.1187896]


 26%|██▌       | 783000/3000000 [9:29:40<172:25:07,  3.57it/s]

test_rew_main =  0.19895875302148464


 26%|██▌       | 785988/3000000 [9:33:21<18:25:00, 33.39it/s] 

ep_rew_main =  [1.3380864]
test_rew_main =  0.4579150353663672


 26%|██▋       | 788986/3000000 [9:37:31<22:41:58, 27.06it/s] 

ep_rew_main =  [1.3956491]


 26%|██▋       | 789000/3000000 [9:37:49<223:20:20,  2.75it/s]

test_rew_main =  0.2362156282909539


 26%|██▋       | 791992/3000000 [9:41:26<17:30:08, 35.04it/s] 

ep_rew_main =  [0.06225055]


 26%|██▋       | 792000/3000000 [9:41:43<235:12:06,  2.61it/s]

test_rew_main =  0.3339510675271601


 26%|██▋       | 794999/3000000 [9:45:10<15:31:28, 39.45it/s] 

ep_rew_main =  [0.45683137]
test_rew_main =  0.38764800612955175


 27%|██▋       | 797987/3000000 [9:49:01<23:29:23, 26.04it/s] 

ep_rew_main =  [2.0535655]
test_rew_main =  0.13713366260036736


 27%|██▋       | 800994/3000000 [9:53:01<17:17:32, 35.32it/s] 

ep_rew_main =  [1.3840544]
test_rew_main =  0.2775053577531347


 27%|██▋       | 803992/3000000 [9:57:28<19:35:32, 31.13it/s] 

ep_rew_main =  [-0.05450818]


 27%|██▋       | 804000/3000000 [9:57:43<190:58:43,  3.19it/s]

test_rew_main =  0.3272991711475322


 27%|██▋       | 806989/3000000 [10:01:33<18:10:38, 33.51it/s] 

ep_rew_main =  [2.012985]


 27%|██▋       | 807000/3000000 [10:01:51<231:00:42,  2.64it/s]

test_rew_main =  0.028353751452008184


 27%|██▋       | 809987/3000000 [10:05:52<17:03:07, 35.68it/s] 

ep_rew_main =  [-0.03518998]
test_rew_main =  0.2839695036270082
current_time =  2022-02-15T08:54:21.894861


 27%|██▋       | 810000/3000000 [10:07:31<1323:28:38,  2.18s/it]

current_time =  2022-02-15T08:55:44.507623


 27%|██▋       | 812996/3000000 [10:11:46<16:08:24, 37.64it/s]  

ep_rew_main =  [0.9055127]


 27%|██▋       | 813000/3000000 [10:12:03<279:35:07,  2.17it/s]

test_rew_main =  0.13396663739045828


 27%|██▋       | 815997/3000000 [10:15:59<13:13:06, 45.90it/s] 

ep_rew_main =  [1.0717069]


 27%|██▋       | 816000/3000000 [10:16:16<313:40:38,  1.93it/s]

test_rew_main =  0.14839634864647228


 27%|██▋       | 818992/3000000 [10:20:29<17:09:32, 35.31it/s] 

ep_rew_main =  [0.505827]


 27%|██▋       | 819000/3000000 [10:20:48<295:49:26,  2.05it/s]

test_rew_main =  0.2590480666031785


 27%|██▋       | 821984/3000000 [10:25:11<18:02:50, 33.52it/s] 

ep_rew_main =  [1.1583691]


 27%|██▋       | 822000/3000000 [10:25:34<267:29:56,  2.26it/s]

test_rew_main =  0.22765255013852262


 27%|██▋       | 824995/3000000 [10:29:32<16:55:55, 35.68it/s] 

ep_rew_main =  [-0.00079829]


 28%|██▊       | 825000/3000000 [10:29:47<233:05:27,  2.59it/s]

test_rew_main =  0.2273750173767628


 28%|██▊       | 827986/3000000 [10:33:26<20:35:21, 29.30it/s] 

ep_rew_main =  [1.3270663]


 28%|██▊       | 828000/3000000 [10:33:43<228:48:58,  2.64it/s]

test_rew_main =  0.5593208724497007


 28%|██▊       | 830999/3000000 [10:37:40<17:48:44, 33.82it/s] 

ep_rew_main =  [0.36156327]


 28%|██▊       | 831000/3000000 [10:37:57<269:31:32,  2.24it/s]

test_rew_main =  0.35633603504285005


 28%|██▊       | 833990/3000000 [10:41:57<17:58:55, 33.46it/s] 

ep_rew_main =  [1.5688033]


 28%|██▊       | 834000/3000000 [10:42:13<213:57:46,  2.81it/s]

test_rew_main =  0.44094879818843336


 28%|██▊       | 836996/3000000 [10:50:19<15:43:15, 38.22it/s]  

ep_rew_main =  [1.0720062]


 28%|██▊       | 837000/3000000 [10:51:36<3300:21:15,  5.49s/it]

test_rew_main =  0.11152401113537118


 28%|██▊       | 839298/3000000 [11:02:52<20:03:22, 29.93it/s]  Experiencing connection interruptions. Will try to reestablish communication with Neptune. Internal exception was: RequestsFutureAdapterConnectionError
 28%|██▊       | 839996/3000000 [11:03:44<17:02:38, 35.20it/s]

ep_rew_main =  [1.2681094]
test_rew_main =  0.5706804917285723
current_time =  2022-02-15T09:52:10.223721


 28%|██▊       | 840000/3000000 [11:05:15<1272:35:30,  2.12s/it]

current_time =  2022-02-15T09:53:28.689026


 28%|██▊       | 840392/3000000 [11:05:53<19:10:53, 31.27it/s]  Communication with Neptune restored!
 28%|██▊       | 842991/3000000 [11:09:50<20:23:58, 29.37it/s] 

ep_rew_main =  [0.5693207]
test_rew_main =  0.551027399975651


 28%|██▊       | 845996/3000000 [11:14:39<14:30:11, 41.26it/s] 

ep_rew_main =  [1.3357797]


 28%|██▊       | 846000/3000000 [11:14:59<347:08:17,  1.72it/s]

test_rew_main =  0.5271909263281376


 28%|██▊       | 847999/3000000 [11:18:00<14:45:39, 40.50it/s] Experiencing connection interruptions. Will try to reestablish communication with Neptune. Internal exception was: RequestsFutureAdapterConnectionError
Communication with Neptune restored!
 28%|██▊       | 848991/3000000 [11:19:39<16:45:27, 35.66it/s] 

ep_rew_main =  [1.3520378]


 28%|██▊       | 849000/3000000 [11:19:56<254:56:37,  2.34it/s]

test_rew_main =  0.4063797468835695


 28%|██▊       | 851987/3000000 [11:24:25<17:33:34, 33.98it/s] 

ep_rew_main =  [1.2747407]


 28%|██▊       | 852000/3000000 [11:24:43<243:52:32,  2.45it/s]

test_rew_main =  0.3479211381970148


 28%|██▊       | 854986/3000000 [11:29:23<21:32:32, 27.66it/s] 

ep_rew_main =  [0.68424934]


 28%|██▊       | 855000/3000000 [11:29:42<231:34:12,  2.57it/s]

test_rew_main =  0.17007523699644939


 29%|██▊       | 857984/3000000 [11:33:51<18:45:02, 31.73it/s] 

ep_rew_main =  [0.50874895]


 29%|██▊       | 858000/3000000 [11:34:07<189:21:08,  3.14it/s]

test_rew_main =  0.5267774408644991


 29%|██▊       | 860990/3000000 [11:38:10<19:04:42, 31.14it/s] 

ep_rew_main =  [1.3928388]


 29%|██▊       | 861000/3000000 [11:38:28<239:43:04,  2.48it/s]

test_rew_main =  0.2775441802449005


 29%|██▉       | 863993/3000000 [11:42:25<18:24:27, 32.23it/s] 

ep_rew_main =  [1.1643187]


 29%|██▉       | 864000/3000000 [11:42:42<254:57:26,  2.33it/s]

test_rew_main =  0.08541674348920232


 29%|██▉       | 866995/3000000 [11:46:44<18:59:50, 31.19it/s] 

ep_rew_main =  [0.4460629]


 29%|██▉       | 867000/3000000 [11:47:03<263:29:44,  2.25it/s]

test_rew_main =  0.24491435930932398


 29%|██▉       | 869998/3000000 [11:51:10<17:39:26, 33.51it/s] 

ep_rew_main =  [0.961455]


 29%|██▉       | 869998/3000000 [11:51:23<17:39:26, 33.51it/s]

test_rew_main =  0.12963810918850455
current_time =  2022-02-15T10:39:40.452585


 29%|██▉       | 870000/3000000 [11:52:41<1333:57:10,  2.25s/it]

current_time =  2022-02-15T10:40:54.681452


 29%|██▉       | 872986/3000000 [11:56:40<19:55:26, 29.65it/s]  

ep_rew_main =  [-0.10596314]


 29%|██▉       | 873000/3000000 [11:56:59<231:00:38,  2.56it/s]

test_rew_main =  0.2543280170838553


 29%|██▉       | 875989/3000000 [12:00:54<19:02:52, 30.97it/s] 

ep_rew_main =  [-0.11560945]
test_rew_main =  0.20073400551290405


 29%|██▉       | 878985/3000000 [12:05:15<23:27:38, 25.11it/s] 

ep_rew_main =  [0.6770805]
test_rew_main =  0.2913829331943168


 29%|██▉       | 881990/3000000 [12:09:38<18:54:18, 31.12it/s] 

ep_rew_main =  [1.3455987]


 29%|██▉       | 882000/3000000 [12:09:55<236:31:15,  2.49it/s]

test_rew_main =  0.24556531814287422


 29%|██▉       | 884990/3000000 [12:13:57<18:52:29, 31.13it/s] 

ep_rew_main =  [0.7379332]


 30%|██▉       | 885000/3000000 [12:14:16<242:45:28,  2.42it/s]

test_rew_main =  0.25123464375783316


 30%|██▉       | 887986/3000000 [12:18:17<23:57:28, 24.49it/s] 

ep_rew_main =  [1.3038131]
test_rew_main =  0.374488272599326


 30%|██▉       | 890994/3000000 [12:22:31<14:59:10, 39.09it/s] 

ep_rew_main =  [0.5129724]


 30%|██▉       | 891000/3000000 [12:22:49<272:25:58,  2.15it/s]

test_rew_main =  0.20679011700799527


 30%|██▉       | 893997/3000000 [12:27:05<21:05:26, 27.74it/s] 

ep_rew_main =  [1.2068152]


 30%|██▉       | 894000/3000000 [12:27:21<249:10:47,  2.35it/s]

test_rew_main =  0.4393253951055082


 30%|██▉       | 896997/3000000 [12:31:34<18:39:23, 31.31it/s] 

ep_rew_main =  [1.4412369]


 30%|██▉       | 897000/3000000 [12:31:51<238:41:34,  2.45it/s]

test_rew_main =  0.21527828640702942


 30%|██▉       | 899992/3000000 [12:35:56<17:19:34, 33.67it/s] 

ep_rew_main =  [1.1325167]


 30%|██▉       | 899992/3000000 [12:36:08<17:19:34, 33.67it/s]

test_rew_main =  0.5017221177207165
current_time =  2022-02-15T11:24:23.128304


 30%|███       | 900000/3000000 [12:37:22<1076:41:36,  1.85s/it]

current_time =  2022-02-15T11:25:35.382469


 30%|███       | 902993/3000000 [12:41:55<22:15:55, 26.16it/s]  

ep_rew_main =  [2.0420914]


 30%|███       | 903000/3000000 [12:42:16<378:49:09,  1.54it/s]

test_rew_main =  0.4751929667108076


 30%|███       | 905991/3000000 [12:48:09<17:49:10, 32.64it/s] 

ep_rew_main =  [0.69121015]


 30%|███       | 906000/3000000 [12:48:30<355:04:41,  1.64it/s]

test_rew_main =  0.3734401553012927


 30%|███       | 908995/3000000 [12:54:26<18:57:39, 30.63it/s] 

ep_rew_main =  [1.2638068]


 30%|███       | 909000/3000000 [12:54:45<300:29:01,  1.93it/s]

test_rew_main =  0.1337012219220113


 30%|███       | 911100/3000000 [12:57:48<29:43:17, 19.52it/s] 


KeyboardInterrupt: 

Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 26 operations to synchronize with Neptune. Do not kill this process.


All 26 operations synced, thanks for waiting!


In [None]:
model = ac.q
print("Model_q's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

model = ac.pi
print("Model_pi's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

Model_q's state_dict:
q.0.weight 	 torch.Size([256, 41])
q.0.bias 	 torch.Size([256])
q.2.weight 	 torch.Size([256, 256])
q.2.bias 	 torch.Size([256])
q.4.weight 	 torch.Size([256, 256])
q.4.bias 	 torch.Size([256])
q.6.weight 	 torch.Size([1, 256])
q.6.bias 	 torch.Size([1])
Model_pi's state_dict:
pi.0.weight 	 torch.Size([256, 35])
pi.0.bias 	 torch.Size([256])
pi.2.weight 	 torch.Size([256, 256])
pi.2.bias 	 torch.Size([256])
pi.4.weight 	 torch.Size([256, 256])
pi.4.bias 	 torch.Size([256])
pi.6.weight 	 torch.Size([6, 256])
pi.6.bias 	 torch.Size([6])


In [None]:
print("pi_optimizer's state_dict:")
for var_name in pi_optimizer.state_dict():
    print(var_name, "\t", pi_optimizer.state_dict()[var_name])

print("q_optimizer's state_dict:")
for var_name in q_optimizer.state_dict():
    print(var_name, "\t", q_optimizer.state_dict()[var_name])



pi_optimizer's state_dict:
state 	 {0: {'step': 295000, 'square_avg': tensor([[1.5294e-07, 4.3574e-07, 1.7078e-07,  ..., 4.6295e-08, 1.3153e-07,
         3.2821e-08],
        [1.1260e-06, 1.0042e-06, 9.5667e-07,  ..., 3.2371e-07, 2.5242e-06,
         2.0768e-07],
        [3.3766e-07, 1.8660e-07, 2.4575e-07,  ..., 6.6838e-08, 3.6149e-07,
         3.6968e-08],
        ...,
        [3.4772e-07, 6.0496e-07, 3.3804e-07,  ..., 8.6878e-08, 3.6086e-07,
         6.7247e-08],
        [1.1408e-06, 7.1713e-07, 6.1176e-07,  ..., 3.4519e-07, 1.2215e-06,
         1.1788e-07],
        [2.8419e-07, 7.1120e-07, 3.0715e-07,  ..., 6.3061e-08, 4.3581e-07,
         7.5016e-08]], device='cuda:0')}, 1: {'step': 295000, 'square_avg': tensor([5.5312e-07, 3.3224e-06, 6.3727e-07, 5.1333e-07, 8.2843e-07, 6.8615e-07,
        8.8244e-07, 1.9534e-06, 6.2446e-07, 1.3387e-06, 4.0938e-07, 1.2642e-06,
        6.8603e-07, 9.9440e-07, 1.2369e-06, 7.1836e-07, 1.1587e-06, 1.1835e-06,
        9.5166e-07, 5.0816e-07, 1.7202e-0

In [None]:
now = datetime.now()

current_time = str(now.isoformat())



torch.save({
            'model of ac.q': ac.q.state_dict(),
            'model of ac.pi': ac.pi.state_dict(),
            'q_optimizer_state_dict': q_optimizer.state_dict(),
            'pi_optimizer_state_dict': pi_optimizer.state_dict(),
            
            }, "model_nn/model_nn_%s.pt" % current_time)



In [None]:
nep_log.stop()

Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 140 operations to synchronize with Neptune. Do not kill this process.


All 140 operations synced, thanks for waiting!
