## Sloving Needle Master with Twin Delayed DDPG (TD3)
Code modified from https://github.com/nikhilbarhate99/TD3-PyTorch-BipedalWalker-v2 <br>


In [1]:
import sys
import numpy as np
import torch
import argparse
import os
import random
from environment import Environment
from environment import PID
import utils
import TD3_priorized
import TD3
import math
import matplotlib.pyplot as plt

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
class Args:
    policy_name = "TD3"
    env_name = "Needle Master"
    seed = 1e6
    eval_freq = 5e3 # How often (time steps) we evaluate
    max_timesteps = 1e5  # Max time steps to run environment for
    save_models = "store"
    expl_noise = 1    # Std of Gaussian exploration noise
    batch_size = 100
    discount = 0.99   # Discount factor
    tau = 0.005         # Target network update rate
    policy_noise = 0.2   # Noise added to target policy during critic update
    noise_clip = 0.5
    policy_freq = 2  # Frequency of delayed policy updates
    max_size = 1e6
    pid_freq = 9e2    # How often purely random policy is run for
    pid_interval = 5e2   # How many time steps purely random policy is run for
    filename = 'environment_17'
    
args=Args()

In [3]:
# Setup
random.seed(args.seed)
torch.manual_seed(random.randint(1, 10000))
if torch.cuda.is_available():
    args.device = torch.device('cuda')
    torch.cuda.manual_seed(random.randint(1, 10000))
    torch.backends.cudnn.enabled = False  # Disable nondeterministic ops (not sure if critical but better safe than sorry)
else:
    args.device = torch.device('cpu')

In [4]:
sys.path.insert(0, '/home/lifan/workspace/RL/needle_master_tools/data')

### Model evaluation

In [5]:
def evaluate_policy(policy, log_f):
    eval_path = './evaluate/'
    evaluation_time = 3
    if not os.path.exists(eval_path):
        os.mkdir(eval_path)

    state = env.reset(log_f)
    done = False
    env.episode_num += 1
    env.episode_reward = 0
    episode_timesteps = 0
    average_reward = 0


    while not done:
        action = policy.select_action(state)
        # print("state: " + str(state))
        # print("action: " + str(action))
        new_state, reward, done = env.step(action, log_f)
        # print("next state: " + str(next_state))
        # print("done: " +str(done))
        env.episode_reward += reward
        state = new_state
        episode_timesteps += 1
        env.total_timesteps += 1

    env.render(save_image=True, save_path=eval_path)

    print ("---------------------------------------")
    print ("Episode_num: %d: %f" % (env.episode_num, env.episode_reward))
    print ("---------------------------------------")
    return env.episode_reward



In [6]:
file_name = "%s_%s" % (args.filename, args.policy_name)
print ("---------------------------------------")
print ("Settings: %s" % (file_name))
print ("---------------------------------------")

if not os.path.exists("./results"):
    os.makedirs("./results")
if not os.path.exists("./pytorch_models"):
    os.makedirs("./pytorch_models")

## environment set up
action_dim = 2

""" Adding the log file """
logfile = "%s_%s" % (args.filename, args.policy_name)
log_f = open("log_"+logfile+".txt","w+")
env_path = '/home/lifan/workspace/RL/needle_master_tools/data/'+ args.filename + '.txt'
env = Environment(action_dim,log_f, filename = env_path)

state_dim = len(env.gates) + 9


""""  for PID controller """
action_constrain = [10, np.pi/20]
parameter = [0.1,0.0009]
pid = PID( parameter, env.width, env.height )

""" [lower bound],[higher bound] """
# env.action_bound = np.array((-1,1)) ## for one dimension action
env.action_bound = np.array(([0, -1],[1, 1]))   ## for two dimension action
max_action = 1


""" parameters for epsilon declay """
epsilon_start = 1
epsilon_final = 0.01
decay_rate = 25000
ep_decay = []

""" beta Prioritized Experience Replay"""
beta_start = 0.4
beta_frames = 25000


### for plotting
Reward = []
save_path = './out/'
""" start straightly """
evaluations = []

# Initialize policy
# policy = TD3.TD3(state_dim, action_dim, max_action)
policy = TD3_priorized.TD3(state_dim, action_dim, max_action)
# replay_buffer = utils.ReplayBuffer(args.max_size)
replay_buffer = utils.NaivePrioritizedBuffer(int(args.max_size))

# Evaluate untrained policy
# evaluations = [evaluate_policy(policy)]


env.total_timesteps = 0
timesteps_since_eval = 0
done = True

while env.total_timesteps < args.max_timesteps:

    # Evaluate episode
    if timesteps_since_eval >= args.eval_freq:
        timesteps_since_eval %= args.eval_freq
        evaluations.append(evaluate_policy(policy, log_f))
        
        if env.last_reward > 100 and env.episode_num > 100: 
            policy.save(file_name, directory="./pytorch_models")
            np.save("./results/%s" % (file_name), evaluations)

        continue


    ## finish one episode, and train episode_times
    if done:
#         log_f.write('~~~~~~~~~~~~~~~~~~~~~~~~ iteration {} ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n'.format(env.episode_num))


        ## load model
        # policy.load(file_name,"./pytorch_models")

        ## training as usual
            # if env.total_timesteps != 0 and env.episode_reward > 500:
        if env.total_timesteps != 0:
            log_f.write('Total:{}, Episode Num:{}, Eposide:{}, Reward:{}\n'.format(env.total_timesteps, env.episode_num, episode_timesteps, env.episode_reward))
            log_f.flush()
            
            if env.episode_num % 1 == 0:
                print (("Total T: %d Episode Num: %d Episode T: %d Reward: %f") % (
                env.total_timesteps, env.episode_num, episode_timesteps, env.episode_reward))
                env.render( save_image=True, save_path=save_path)

        if env.total_timesteps != 0:
            beta = min(1.0, beta_start + env.total_timesteps * (1.0 - beta_start) / beta_frames)
            policy.train(replay_buffer, episode_timesteps, beta, args.batch_size, 
                             args.discount, args.tau, args.policy_noise, args.noise_clip, args.policy_freq)

        Reward.append(env.episode_reward)

        # Reset environment
        state = env.reset(log_f)

        done = False

        env.episode_num += 1
        env.episode_reward = 0
        episode_timesteps = 0

    """ exploration rate decay """
    args.expl_noise = (epsilon_start - epsilon_final) * math.exp(-1. * env.total_timesteps / decay_rate)
    ep_decay.append(args.expl_noise)
#     log_f.write('epsilon decay:{}\n'.format(args.expl_noise))
#     if env.total_timesteps % 500 == 0 and args.expl_noise > 0:
#         args.expl_noise *= 0.9

    """ alternative between random selected action and policy selected action """
#     if env.total_timesteps % args.pid_freq < args.pid_interval:
# #     if env.total_timesteps < args.pid_interval:
#         state_pid = state[0:3]
#         action = pid.PIDcontroller( state_pid, env.next_gate, env.gates)
# #         log_f.write('PID Action:{}\n'.format(action))
# #         action = env.sample_action()
#         # log_f.write('~~~~~~~~~~~random action~~~~~~~~~~\n')
#         # log_f.write('random selected action:{}\n'.format(action))

#     else:
#         # print("state: " +str(state))
#         action = policy.select_action(state)
#         # print("select")
#         # log_f.write('~~~~~~~~~~~selected action~~~~~~~~~~\n')
#         log_f.write('Action based on policy:{}\n'.format(action))
#         # print("action based on policy:" + str(action))
#         # print("action selected: " +str(action))
        
#         if args.expl_noise != 0:
#             noise = np.random.normal(0, args.expl_noise, size=action_dim)
#             # print("noise: " + str(noise))
#             action = (action + noise).clip(-1, 1)


    """ using PID controller """
    # state_pid = state[0:3]
    # action = pid.PIDcontroller( state_pid, env.next_gate, env.gates)
    # print("action based on PID: " + str(action))

    """ action selected based on pure policy """
    action = policy.select_action(state)
    log_f.write('action based on policy:{}\n'.format(action))
    # print("action based on policy:" + str(action))
    if args.expl_noise != 0:
#         state_pid = state[0:3]
#         guidance = pid.PIDcontroller( state_pid, env.next_gate, env.gates, env.total_timesteps)
        noise = np.random.normal(0, args.expl_noise, size=action_dim)
        # print("noise: " + str(noise))
#         action = ((1 - args.expl_noise) * action + args.expl_noise * guidance)
        action = action + noise
        action[0] = np.clip(action[0],0,1)
        action[1] = np.clip(action[1],-1,1)


    ### select action only based on pure RL
    # action = policy.select_action(state)
    # print("action selected: " +str(action))


    # Perform action
    new_state, reward, done = env.step(action, log_f)

    done_bool = 0 if episode_timesteps + 1 == env.max_time else float(done)
    env.episode_reward += reward

    # Store data in replay buffer
    replay_buffer.add(state, new_state, action, reward, done_bool)
    # print("state: " + str(state))
    state = new_state

    episode_timesteps += 1
    env.total_timesteps += 1
    timesteps_since_eval += 1

plt.plot(range(len(Reward)), np.array(Reward), 'b')
plt.savefig('./results/episode reward.png')

plt.plot(range(len(policy.actor_loss)), policy.actor_loss, 'b')
plt.savefig('./results/actor loss.png')

plt.plot(range(len(policy.critic_loss)), policy.critic_loss, 'b')
plt.savefig('./results/critic loss.png')

plt.plot(range(len(evaluations)), np.array(evaluations), 'b')
plt.savefig('./results/evaluation reward.png')
print(evaluations)



---------------------------------------
Settings: environment_17_TD3
---------------------------------------
Total T: 26 Episode Num: 1 Episode T: 26 Reward: -0.329997
Total T: 35 Episode Num: 2 Episode T: 9 Reward: -2.585800
Total T: 42 Episode Num: 3 Episode T: 7 Reward: -1.220544
Total T: 48 Episode Num: 4 Episode T: 6 Reward: -1.396407
Total T: 58 Episode Num: 5 Episode T: 10 Reward: -2.632894
Total T: 64 Episode Num: 6 Episode T: 6 Reward: -1.457051
Total T: 75 Episode Num: 7 Episode T: 11 Reward: -650.943841
Total T: 77 Episode Num: 8 Episode T: 2 Reward: 0.138274
Total T: 79 Episode Num: 9 Episode T: 2 Reward: 0.340333
Total T: 81 Episode Num: 10 Episode T: 2 Reward: 0.162302
Total T: 84 Episode Num: 11 Episode T: 3 Reward: -0.083332
Total T: 87 Episode Num: 12 Episode T: 3 Reward: -1.047234
Total T: 94 Episode Num: 13 Episode T: 7 Reward: 0.115451
Total T: 99 Episode Num: 14 Episode T: 5 Reward: 0.198091
Total T: 102 Episode Num: 15 Episode T: 3 Reward: 0.646840
Total T: 105 Ep

Total T: 1161 Episode Num: 136 Episode T: 9 Reward: -0.850153
Total T: 1181 Episode Num: 137 Episode T: 20 Reward: 0.529217
Total T: 1188 Episode Num: 138 Episode T: 7 Reward: 0.549100
Total T: 1197 Episode Num: 139 Episode T: 9 Reward: 0.281984
Total T: 1201 Episode Num: 140 Episode T: 4 Reward: -1.093275
Total T: 1207 Episode Num: 141 Episode T: 6 Reward: 0.603933
Total T: 1221 Episode Num: 142 Episode T: 14 Reward: 0.063113
Total T: 1225 Episode Num: 143 Episode T: 4 Reward: -0.477397
Total T: 1241 Episode Num: 144 Episode T: 16 Reward: 0.257275
Total T: 1255 Episode Num: 145 Episode T: 14 Reward: 0.580093
Total T: 1258 Episode Num: 146 Episode T: 3 Reward: 0.699445
Total T: 1273 Episode Num: 147 Episode T: 15 Reward: 0.313145
Total T: 1285 Episode Num: 148 Episode T: 12 Reward: 0.215128
Total T: 1315 Episode Num: 149 Episode T: 30 Reward: -836.815421
Total T: 1332 Episode Num: 150 Episode T: 17 Reward: 0.312634
Total T: 1344 Episode Num: 151 Episode T: 12 Reward: -0.043743
Total T:

Total T: 2750 Episode Num: 271 Episode T: 5 Reward: -0.516726
Total T: 2781 Episode Num: 272 Episode T: 31 Reward: -1653.785711
Total T: 2786 Episode Num: 273 Episode T: 5 Reward: 0.457869
Total T: 2790 Episode Num: 274 Episode T: 4 Reward: 0.189332
Total T: 2793 Episode Num: 275 Episode T: 3 Reward: 0.039504
Total T: 2796 Episode Num: 276 Episode T: 3 Reward: 0.065313
Total T: 2810 Episode Num: 277 Episode T: 14 Reward: 0.392177
Total T: 2828 Episode Num: 278 Episode T: 18 Reward: 0.276140
Total T: 2839 Episode Num: 279 Episode T: 11 Reward: 0.641553
Total T: 2845 Episode Num: 280 Episode T: 6 Reward: 0.608130
Total T: 2850 Episode Num: 281 Episode T: 5 Reward: -0.649057
Total T: 2861 Episode Num: 282 Episode T: 11 Reward: -0.033460
Total T: 2881 Episode Num: 283 Episode T: 20 Reward: 0.383196
Total T: 2892 Episode Num: 284 Episode T: 11 Reward: 0.627929
Total T: 2895 Episode Num: 285 Episode T: 3 Reward: 0.268028
Total T: 2927 Episode Num: 286 Episode T: 32 Reward: -755.716013
Total 

Total T: 5532 Episode Num: 401 Episode T: 32 Reward: -161.932029
Total T: 5563 Episode Num: 402 Episode T: 31 Reward: -190.993923
Total T: 5592 Episode Num: 403 Episode T: 29 Reward: -0.326877
Total T: 5622 Episode Num: 404 Episode T: 30 Reward: -1.278485
Total T: 5650 Episode Num: 405 Episode T: 28 Reward: -0.682383
Total T: 5663 Episode Num: 406 Episode T: 13 Reward: -474.837012
Total T: 5693 Episode Num: 407 Episode T: 30 Reward: -0.782129
Total T: 5730 Episode Num: 408 Episode T: 37 Reward: -125.730090
Total T: 5760 Episode Num: 409 Episode T: 30 Reward: -0.108514
Total T: 5782 Episode Num: 410 Episode T: 22 Reward: -0.414647
Total T: 5809 Episode Num: 411 Episode T: 27 Reward: -0.457292
Total T: 5828 Episode Num: 412 Episode T: 19 Reward: -162.411859
Total T: 5860 Episode Num: 413 Episode T: 32 Reward: -0.512249
Total T: 5892 Episode Num: 414 Episode T: 32 Reward: -1.007282
Total T: 5924 Episode Num: 415 Episode T: 32 Reward: -689.376963
Total T: 5949 Episode Num: 416 Episode T: 2

Total T: 9259 Episode Num: 532 Episode T: 29 Reward: -1.217757
Total T: 9294 Episode Num: 533 Episode T: 35 Reward: -0.825465
Total T: 9327 Episode Num: 534 Episode T: 33 Reward: -0.260423
Total T: 9360 Episode Num: 535 Episode T: 33 Reward: -0.314374
Total T: 9395 Episode Num: 536 Episode T: 35 Reward: -0.458416
Total T: 9420 Episode Num: 537 Episode T: 25 Reward: 237.562053
Total T: 9443 Episode Num: 538 Episode T: 23 Reward: -635.933831
Total T: 9462 Episode Num: 539 Episode T: 19 Reward: 237.703825
Total T: 9495 Episode Num: 540 Episode T: 33 Reward: -0.895485
Total T: 9527 Episode Num: 541 Episode T: 32 Reward: 0.012006
Total T: 9563 Episode Num: 542 Episode T: 36 Reward: -0.833548
Total T: 9598 Episode Num: 543 Episode T: 35 Reward: -0.290061
Total T: 9622 Episode Num: 544 Episode T: 24 Reward: 236.903250
Total T: 9656 Episode Num: 545 Episode T: 34 Reward: -1.225889
Total T: 9690 Episode Num: 546 Episode T: 34 Reward: -0.308359
Total T: 9720 Episode Num: 547 Episode T: 30 Reward

Total T: 13679 Episode Num: 660 Episode T: 43 Reward: -196.292262
Total T: 13717 Episode Num: 661 Episode T: 38 Reward: -0.133623
Total T: 13755 Episode Num: 662 Episode T: 38 Reward: 237.541007
Total T: 13798 Episode Num: 663 Episode T: 43 Reward: -0.661252
Total T: 13831 Episode Num: 664 Episode T: 33 Reward: 237.340279
Total T: 13871 Episode Num: 665 Episode T: 40 Reward: -1.148520
Total T: 13911 Episode Num: 666 Episode T: 40 Reward: 0.031945
Total T: 13946 Episode Num: 667 Episode T: 35 Reward: -0.033897
Total T: 13992 Episode Num: 668 Episode T: 46 Reward: 0.040253
Total T: 14032 Episode Num: 669 Episode T: 40 Reward: -0.056585
Total T: 14068 Episode Num: 670 Episode T: 36 Reward: -0.141316
Total T: 14112 Episode Num: 671 Episode T: 44 Reward: -0.155854
Total T: 14152 Episode Num: 672 Episode T: 40 Reward: -0.812469
Total T: 14196 Episode Num: 673 Episode T: 44 Reward: -0.042029
Total T: 14242 Episode Num: 674 Episode T: 46 Reward: -0.055881
Total T: 14285 Episode Num: 675 Episod

Total T: 19010 Episode Num: 787 Episode T: 49 Reward: -0.384414
Total T: 19050 Episode Num: 788 Episode T: 40 Reward: 237.483694
Total T: 19098 Episode Num: 789 Episode T: 48 Reward: -0.054637
Total T: 19133 Episode Num: 790 Episode T: 35 Reward: 237.411580
Total T: 19176 Episode Num: 791 Episode T: 43 Reward: 237.491612
Total T: 19216 Episode Num: 792 Episode T: 40 Reward: 236.896638
Total T: 19257 Episode Num: 793 Episode T: 41 Reward: 237.598085
Total T: 19303 Episode Num: 794 Episode T: 46 Reward: -163.053947
Total T: 19347 Episode Num: 795 Episode T: 44 Reward: -162.645169
Total T: 19386 Episode Num: 796 Episode T: 39 Reward: 237.600569
Total T: 19425 Episode Num: 797 Episode T: 39 Reward: 236.996563
Total T: 19466 Episode Num: 798 Episode T: 41 Reward: 236.755552
Total T: 19511 Episode Num: 799 Episode T: 45 Reward: 237.962397
Total T: 19557 Episode Num: 800 Episode T: 46 Reward: 237.156875
Total T: 19598 Episode Num: 801 Episode T: 41 Reward: 237.282425
Total T: 19647 Episode Nu

Total T: 24834 Episode Num: 913 Episode T: 48 Reward: -0.050265
Total T: 24874 Episode Num: 914 Episode T: 40 Reward: 237.207248
Total T: 24933 Episode Num: 915 Episode T: 59 Reward: 236.994948
Total T: 24984 Episode Num: 916 Episode T: 51 Reward: 238.092467
Total T: 25035 Episode Num: 917 Episode T: 51 Reward: 236.692658
Total T: 25078 Episode Num: 918 Episode T: 43 Reward: 237.263355
Total T: 25130 Episode Num: 919 Episode T: 52 Reward: -0.089353
---------------------------------------
Episode_num: 921: -162.710495
---------------------------------------
Total T: 25215 Episode Num: 921 Episode T: 32 Reward: -163.087909
Total T: 25263 Episode Num: 922 Episode T: 48 Reward: 236.828373
Total T: 25324 Episode Num: 923 Episode T: 61 Reward: -162.034483
Total T: 25368 Episode Num: 924 Episode T: 44 Reward: 237.605413
Total T: 25419 Episode Num: 925 Episode T: 51 Reward: 237.087238
Total T: 25461 Episode Num: 926 Episode T: 42 Reward: -0.638592
Total T: 25505 Episode Num: 927 Episode T: 44 

Total T: 31008 Episode Num: 1038 Episode T: 60 Reward: -0.349350
Total T: 31029 Episode Num: 1039 Episode T: 21 Reward: -0.078924
Total T: 31067 Episode Num: 1040 Episode T: 38 Reward: 237.304912
Total T: 31118 Episode Num: 1041 Episode T: 51 Reward: -162.683135
Total T: 31180 Episode Num: 1042 Episode T: 62 Reward: 0.019890
Total T: 31230 Episode Num: 1043 Episode T: 50 Reward: 238.070682
Total T: 31286 Episode Num: 1044 Episode T: 56 Reward: -0.470667
Total T: 31336 Episode Num: 1045 Episode T: 50 Reward: 238.135223
Total T: 31392 Episode Num: 1046 Episode T: 56 Reward: -161.820689
Total T: 31437 Episode Num: 1047 Episode T: 45 Reward: 238.142872
Total T: 31480 Episode Num: 1048 Episode T: 43 Reward: 238.189318
Total T: 31537 Episode Num: 1049 Episode T: 57 Reward: -162.442519
Total T: 31598 Episode Num: 1050 Episode T: 61 Reward: -0.670408
Total T: 31645 Episode Num: 1051 Episode T: 47 Reward: 237.558081
Total T: 31706 Episode Num: 1052 Episode T: 61 Reward: -0.184111
Total T: 31774

Total T: 38225 Episode Num: 1163 Episode T: 58 Reward: -162.641862
Total T: 38281 Episode Num: 1164 Episode T: 56 Reward: -0.433999
Total T: 38337 Episode Num: 1165 Episode T: 56 Reward: -0.391411
Total T: 38380 Episode Num: 1166 Episode T: 43 Reward: 237.823612
Total T: 38428 Episode Num: 1167 Episode T: 48 Reward: 238.093829
Total T: 38476 Episode Num: 1168 Episode T: 48 Reward: 238.012988
Total T: 38524 Episode Num: 1169 Episode T: 48 Reward: 237.811116
Total T: 38573 Episode Num: 1170 Episode T: 49 Reward: 237.338169
Total T: 38623 Episode Num: 1171 Episode T: 50 Reward: -0.513294
Total T: 38665 Episode Num: 1172 Episode T: 42 Reward: 237.094741
Total T: 38721 Episode Num: 1173 Episode T: 56 Reward: -0.230562
Total T: 38773 Episode Num: 1174 Episode T: 52 Reward: -162.183320
Total T: 38827 Episode Num: 1175 Episode T: 54 Reward: -162.505387
Total T: 38875 Episode Num: 1176 Episode T: 48 Reward: -0.578907
Total T: 38919 Episode Num: 1177 Episode T: 44 Reward: 237.266744
Total T: 389

Total T: 44829 Episode Num: 1287 Episode T: 62 Reward: -162.761163
Total T: 44886 Episode Num: 1288 Episode T: 57 Reward: 237.310263
Total T: 44926 Episode Num: 1289 Episode T: 40 Reward: 237.194904
Total T: 44973 Episode Num: 1290 Episode T: 47 Reward: 238.019129
Total T: 45030 Episode Num: 1291 Episode T: 57 Reward: 237.477401
Total T: 45088 Episode Num: 1292 Episode T: 58 Reward: 237.789625
Total T: 45167 Episode Num: 1293 Episode T: 79 Reward: -162.785584
Total T: 45233 Episode Num: 1294 Episode T: 66 Reward: -162.545397
Total T: 45291 Episode Num: 1295 Episode T: 58 Reward: 0.066633
Total T: 45356 Episode Num: 1296 Episode T: 65 Reward: -0.650584
---------------------------------------
Episode_num: 1298: 237.745992
---------------------------------------
Total T: 45469 Episode Num: 1298 Episode T: 51 Reward: 238.164043
Total T: 45511 Episode Num: 1299 Episode T: 42 Reward: 237.564458
Total T: 45562 Episode Num: 1300 Episode T: 51 Reward: 237.679373
Total T: 45631 Episode Num: 1301

Total T: 50316 Episode Num: 1411 Episode T: 42 Reward: -161.566786
Total T: 50332 Episode Num: 1412 Episode T: 16 Reward: 237.572223
Total T: 50367 Episode Num: 1413 Episode T: 35 Reward: -161.692971
Total T: 50382 Episode Num: 1414 Episode T: 15 Reward: 237.662258
Total T: 50398 Episode Num: 1415 Episode T: 16 Reward: 238.111485
Total T: 50418 Episode Num: 1416 Episode T: 20 Reward: 238.125107
Total T: 50436 Episode Num: 1417 Episode T: 18 Reward: 238.304942
Total T: 50451 Episode Num: 1418 Episode T: 15 Reward: 238.329832
Total T: 50466 Episode Num: 1419 Episode T: 15 Reward: 237.587334
---------------------------------------
Episode_num: 1421: 237.765294
---------------------------------------
Total T: 50484 Episode Num: 1421 Episode T: 3 Reward: 237.985127
Total T: 50501 Episode Num: 1422 Episode T: 17 Reward: 238.243766
Total T: 50523 Episode Num: 1423 Episode T: 22 Reward: -162.323277
Total T: 50537 Episode Num: 1424 Episode T: 14 Reward: 237.890815
Total T: 50558 Episode Num: 14

Total T: 52809 Episode Num: 1536 Episode T: 18 Reward: 237.708811
Total T: 52842 Episode Num: 1537 Episode T: 33 Reward: 0.009746
Total T: 52857 Episode Num: 1538 Episode T: 15 Reward: 238.058085
Total T: 52891 Episode Num: 1539 Episode T: 34 Reward: -162.527316
Total T: 52907 Episode Num: 1540 Episode T: 16 Reward: 238.264982
Total T: 52921 Episode Num: 1541 Episode T: 14 Reward: 237.627133
Total T: 52935 Episode Num: 1542 Episode T: 14 Reward: 237.994851
Total T: 52958 Episode Num: 1543 Episode T: 23 Reward: -161.929625
Total T: 52976 Episode Num: 1544 Episode T: 18 Reward: 237.702767
Total T: 52992 Episode Num: 1545 Episode T: 16 Reward: 238.397394
Total T: 53009 Episode Num: 1546 Episode T: 17 Reward: 237.971750
Total T: 53028 Episode Num: 1547 Episode T: 19 Reward: -162.078540
Total T: 53055 Episode Num: 1548 Episode T: 27 Reward: -0.105050
Total T: 53071 Episode Num: 1549 Episode T: 16 Reward: 238.350536
Total T: 53090 Episode Num: 1550 Episode T: 19 Reward: 237.527519
Total T: 5

Total T: 55195 Episode Num: 1662 Episode T: 15 Reward: 237.751834
Total T: 55210 Episode Num: 1663 Episode T: 15 Reward: 237.674919
Total T: 55226 Episode Num: 1664 Episode T: 16 Reward: 237.847257
Total T: 55240 Episode Num: 1665 Episode T: 14 Reward: 238.203824
Total T: 55255 Episode Num: 1666 Episode T: 15 Reward: 238.001755
Total T: 55270 Episode Num: 1667 Episode T: 15 Reward: 238.088542
Total T: 55285 Episode Num: 1668 Episode T: 15 Reward: 238.229466
Total T: 55299 Episode Num: 1669 Episode T: 14 Reward: 238.050393
Total T: 55328 Episode Num: 1670 Episode T: 29 Reward: -0.323103
Total T: 55343 Episode Num: 1671 Episode T: 15 Reward: 238.381432
Total T: 55359 Episode Num: 1672 Episode T: 16 Reward: 238.145917
Total T: 55374 Episode Num: 1673 Episode T: 15 Reward: 237.441499
Total T: 55388 Episode Num: 1674 Episode T: 14 Reward: 237.725703
Total T: 55404 Episode Num: 1675 Episode T: 16 Reward: 238.433000
Total T: 55423 Episode Num: 1676 Episode T: 19 Reward: 237.741049
Total T: 55

Total T: 57368 Episode Num: 1786 Episode T: 17 Reward: 238.129785
Total T: 57386 Episode Num: 1787 Episode T: 18 Reward: 237.762270
Total T: 57405 Episode Num: 1788 Episode T: 19 Reward: 237.890354
Total T: 57422 Episode Num: 1789 Episode T: 17 Reward: 237.796850
Total T: 57440 Episode Num: 1790 Episode T: 18 Reward: -161.952423
Total T: 57458 Episode Num: 1791 Episode T: 18 Reward: 237.724753
Total T: 57474 Episode Num: 1792 Episode T: 16 Reward: 238.043705
Total T: 57492 Episode Num: 1793 Episode T: 18 Reward: 237.887374
Total T: 57507 Episode Num: 1794 Episode T: 15 Reward: 238.094821
Total T: 57523 Episode Num: 1795 Episode T: 16 Reward: 237.930631
Total T: 57539 Episode Num: 1796 Episode T: 16 Reward: 237.988356
Total T: 57555 Episode Num: 1797 Episode T: 16 Reward: 238.002849
Total T: 57573 Episode Num: 1798 Episode T: 18 Reward: 238.076892
Total T: 57588 Episode Num: 1799 Episode T: 15 Reward: 238.027199
Total T: 57603 Episode Num: 1800 Episode T: 15 Reward: 237.792956
Total T: 

Total T: 59617 Episode Num: 1910 Episode T: 21 Reward: 237.570983
Total T: 59635 Episode Num: 1911 Episode T: 18 Reward: 237.841598
Total T: 59653 Episode Num: 1912 Episode T: 18 Reward: 238.049414
Total T: 59667 Episode Num: 1913 Episode T: 14 Reward: 238.014612
Total T: 59688 Episode Num: 1914 Episode T: 21 Reward: -161.651946
Total T: 59708 Episode Num: 1915 Episode T: 20 Reward: -161.993387
Total T: 59727 Episode Num: 1916 Episode T: 19 Reward: 237.719197
Total T: 59743 Episode Num: 1917 Episode T: 16 Reward: 238.235487
Total T: 59760 Episode Num: 1918 Episode T: 17 Reward: 238.325985
Total T: 59781 Episode Num: 1919 Episode T: 21 Reward: -162.350059
Total T: 59798 Episode Num: 1920 Episode T: 17 Reward: 238.383273
Total T: 59817 Episode Num: 1921 Episode T: 19 Reward: 237.927138
Total T: 59836 Episode Num: 1922 Episode T: 19 Reward: -162.155770
Total T: 59864 Episode Num: 1923 Episode T: 28 Reward: -0.327114
Total T: 59882 Episode Num: 1924 Episode T: 18 Reward: -161.611120
Total 

Total T: 61998 Episode Num: 2035 Episode T: 19 Reward: 238.265458
Total T: 62019 Episode Num: 2036 Episode T: 21 Reward: -161.678593
Total T: 62034 Episode Num: 2037 Episode T: 15 Reward: 238.088053
Total T: 62055 Episode Num: 2038 Episode T: 21 Reward: -161.834875
Total T: 62078 Episode Num: 2039 Episode T: 23 Reward: 238.124678
Total T: 62118 Episode Num: 2040 Episode T: 40 Reward: -0.301501
Total T: 62139 Episode Num: 2041 Episode T: 21 Reward: 237.912893
Total T: 62158 Episode Num: 2042 Episode T: 19 Reward: 237.640178
Total T: 62177 Episode Num: 2043 Episode T: 19 Reward: 237.666178
Total T: 62198 Episode Num: 2044 Episode T: 21 Reward: 238.466245
Total T: 62226 Episode Num: 2045 Episode T: 28 Reward: -161.985881
Total T: 62244 Episode Num: 2046 Episode T: 18 Reward: 238.136803
Total T: 62265 Episode Num: 2047 Episode T: 21 Reward: 237.763651
Total T: 62283 Episode Num: 2048 Episode T: 18 Reward: 238.315084
Total T: 62301 Episode Num: 2049 Episode T: 18 Reward: -161.890499
Total T

Total T: 64443 Episode Num: 2159 Episode T: 18 Reward: 238.407572
Total T: 64459 Episode Num: 2160 Episode T: 16 Reward: 237.735343
Total T: 64486 Episode Num: 2161 Episode T: 27 Reward: -0.377978
Total T: 64502 Episode Num: 2162 Episode T: 16 Reward: -162.122087
Total T: 64519 Episode Num: 2163 Episode T: 17 Reward: 238.252220
Total T: 64535 Episode Num: 2164 Episode T: 16 Reward: 237.838705
Total T: 64551 Episode Num: 2165 Episode T: 16 Reward: 237.923633
Total T: 64576 Episode Num: 2166 Episode T: 25 Reward: -0.360351
Total T: 64593 Episode Num: 2167 Episode T: 17 Reward: -161.765504
Total T: 64610 Episode Num: 2168 Episode T: 17 Reward: 238.484892
Total T: 64638 Episode Num: 2169 Episode T: 28 Reward: 0.289879
Total T: 64655 Episode Num: 2170 Episode T: 17 Reward: 237.616491
Total T: 64681 Episode Num: 2171 Episode T: 26 Reward: -0.346200
Total T: 64709 Episode Num: 2172 Episode T: 28 Reward: -0.262565
Total T: 64725 Episode Num: 2173 Episode T: 16 Reward: 238.170581
Total T: 64745

Total T: 66825 Episode Num: 2284 Episode T: 18 Reward: 238.197112
Total T: 66842 Episode Num: 2285 Episode T: 17 Reward: 237.547890
Total T: 66875 Episode Num: 2286 Episode T: 33 Reward: -0.067160
Total T: 66892 Episode Num: 2287 Episode T: 17 Reward: 237.512622
Total T: 66910 Episode Num: 2288 Episode T: 18 Reward: -162.153479
Total T: 66926 Episode Num: 2289 Episode T: 16 Reward: 237.627911
Total T: 66943 Episode Num: 2290 Episode T: 17 Reward: 238.237934
Total T: 66960 Episode Num: 2291 Episode T: 17 Reward: 238.099793
Total T: 66977 Episode Num: 2292 Episode T: 17 Reward: 238.079787
Total T: 66994 Episode Num: 2293 Episode T: 17 Reward: 237.588927
Total T: 67012 Episode Num: 2294 Episode T: 18 Reward: 238.309684
Total T: 67029 Episode Num: 2295 Episode T: 17 Reward: 238.330015
Total T: 67046 Episode Num: 2296 Episode T: 17 Reward: 237.591292
Total T: 67061 Episode Num: 2297 Episode T: 15 Reward: -161.785541
Total T: 67079 Episode Num: 2298 Episode T: 18 Reward: 238.510229
Total T: 

Total T: 69410 Episode Num: 2409 Episode T: 18 Reward: -161.925476
Total T: 69429 Episode Num: 2410 Episode T: 19 Reward: -162.113436
Total T: 69447 Episode Num: 2411 Episode T: 18 Reward: -161.955172
Total T: 69467 Episode Num: 2412 Episode T: 20 Reward: -162.056155
Total T: 69492 Episode Num: 2413 Episode T: 25 Reward: 237.855870
Total T: 69515 Episode Num: 2414 Episode T: 23 Reward: 238.416273
Total T: 69541 Episode Num: 2415 Episode T: 26 Reward: 238.383146
Total T: 69559 Episode Num: 2416 Episode T: 18 Reward: 238.270152
Total T: 69585 Episode Num: 2417 Episode T: 26 Reward: 238.464274
Total T: 69608 Episode Num: 2418 Episode T: 23 Reward: 238.154391
Total T: 69623 Episode Num: 2419 Episode T: 15 Reward: -161.832677
Total T: 69643 Episode Num: 2420 Episode T: 20 Reward: 237.796270
Total T: 69666 Episode Num: 2421 Episode T: 23 Reward: 238.277619
Total T: 69688 Episode Num: 2422 Episode T: 22 Reward: 238.327482
Total T: 69710 Episode Num: 2423 Episode T: 22 Reward: 238.476585
Total

Total T: 73772 Episode Num: 2533 Episode T: 43 Reward: 238.146352
Total T: 73805 Episode Num: 2534 Episode T: 33 Reward: 237.913062
Total T: 73838 Episode Num: 2535 Episode T: 33 Reward: 238.546096
Total T: 73869 Episode Num: 2536 Episode T: 31 Reward: 238.351113
Total T: 73901 Episode Num: 2537 Episode T: 32 Reward: 238.008162
Total T: 73948 Episode Num: 2538 Episode T: 47 Reward: 238.293226
Total T: 73989 Episode Num: 2539 Episode T: 41 Reward: -161.760987
Total T: 74138 Episode Num: 2540 Episode T: 149 Reward: 238.150587
Total T: 74167 Episode Num: 2541 Episode T: 29 Reward: -1.482749
Total T: 74215 Episode Num: 2542 Episode T: 48 Reward: 238.096105
Total T: 74270 Episode Num: 2543 Episode T: 55 Reward: 238.411401
Total T: 74336 Episode Num: 2544 Episode T: 66 Reward: 237.505472
Total T: 74431 Episode Num: 2545 Episode T: 95 Reward: 238.151253
Total T: 74563 Episode Num: 2546 Episode T: 132 Reward: 238.241073
Total T: 74626 Episode Num: 2547 Episode T: 63 Reward: 238.321187
Total T:

Total T: 79877 Episode Num: 2657 Episode T: 47 Reward: 238.010430
Total T: 79930 Episode Num: 2658 Episode T: 53 Reward: 237.567454
Total T: 79961 Episode Num: 2659 Episode T: 31 Reward: -0.034514
Total T: 80016 Episode Num: 2660 Episode T: 55 Reward: 237.651054
Total T: 80042 Episode Num: 2661 Episode T: 26 Reward: -161.825453
Total T: 80070 Episode Num: 2662 Episode T: 28 Reward: 0.167316
Total T: 80122 Episode Num: 2663 Episode T: 52 Reward: 237.645974
Total T: 80178 Episode Num: 2664 Episode T: 56 Reward: 237.522034
Total T: 80198 Episode Num: 2665 Episode T: 20 Reward: -162.138564
Total T: 80251 Episode Num: 2666 Episode T: 53 Reward: 237.897822
Total T: 80305 Episode Num: 2667 Episode T: 54 Reward: 237.515109
Total T: 80355 Episode Num: 2668 Episode T: 50 Reward: 237.521231
Total T: 80409 Episode Num: 2669 Episode T: 54 Reward: 237.558061
Total T: 80435 Episode Num: 2670 Episode T: 26 Reward: -162.061982
Total T: 80500 Episode Num: 2671 Episode T: 65 Reward: 237.719567
Total T: 8

---------------------------------------
Episode_num: 2782: 238.135780
---------------------------------------
Total T: 85732 Episode Num: 2782 Episode T: 31 Reward: 238.097536
Total T: 85780 Episode Num: 2783 Episode T: 48 Reward: 237.539198
Total T: 85808 Episode Num: 2784 Episode T: 28 Reward: -0.122428
Total T: 85840 Episode Num: 2785 Episode T: 32 Reward: -161.774340
Total T: 85856 Episode Num: 2786 Episode T: 16 Reward: -161.536512
Total T: 85903 Episode Num: 2787 Episode T: 47 Reward: 237.565660
Total T: 85952 Episode Num: 2788 Episode T: 49 Reward: 237.558390
Total T: 85981 Episode Num: 2789 Episode T: 29 Reward: -0.247757
Total T: 86010 Episode Num: 2790 Episode T: 29 Reward: -161.451287
Total T: 86057 Episode Num: 2791 Episode T: 47 Reward: 237.586122
Total T: 86103 Episode Num: 2792 Episode T: 46 Reward: 237.606041
Total T: 86139 Episode Num: 2793 Episode T: 36 Reward: 0.330207
Total T: 86168 Episode Num: 2794 Episode T: 29 Reward: 0.359286
Total T: 86196 Episode Num: 2795 Ep

Total T: 91314 Episode Num: 2904 Episode T: 56 Reward: 0.211778
Total T: 91388 Episode Num: 2905 Episode T: 74 Reward: 238.348434
Total T: 91455 Episode Num: 2906 Episode T: 67 Reward: 238.015007
Total T: 91507 Episode Num: 2907 Episode T: 52 Reward: 238.501608
Total T: 91534 Episode Num: 2908 Episode T: 27 Reward: -162.055562
Total T: 91586 Episode Num: 2909 Episode T: 52 Reward: 237.823503
Total T: 91616 Episode Num: 2910 Episode T: 30 Reward: 0.292424
Total T: 91667 Episode Num: 2911 Episode T: 51 Reward: 238.043642
Total T: 91730 Episode Num: 2912 Episode T: 63 Reward: -0.465529
Total T: 91786 Episode Num: 2913 Episode T: 56 Reward: 237.821345
Total T: 91850 Episode Num: 2914 Episode T: 64 Reward: 238.413491
Total T: 91905 Episode Num: 2915 Episode T: 55 Reward: -0.446674
Total T: 91964 Episode Num: 2916 Episode T: 59 Reward: -0.389190
Total T: 92017 Episode Num: 2917 Episode T: 53 Reward: 238.223043
Total T: 92073 Episode Num: 2918 Episode T: 56 Reward: 237.678918
Total T: 92127 E

Total T: 97894 Episode Num: 3028 Episode T: 54 Reward: 237.984908
Total T: 97923 Episode Num: 3029 Episode T: 29 Reward: 0.013392
Total T: 97983 Episode Num: 3030 Episode T: 60 Reward: 237.715408
Total T: 98043 Episode Num: 3031 Episode T: 60 Reward: 238.387990
Total T: 98104 Episode Num: 3032 Episode T: 61 Reward: 237.948323
Total T: 98166 Episode Num: 3033 Episode T: 62 Reward: 237.961782
Total T: 98222 Episode Num: 3034 Episode T: 56 Reward: 237.925731
Total T: 98283 Episode Num: 3035 Episode T: 61 Reward: 237.891841
Total T: 98333 Episode Num: 3036 Episode T: 50 Reward: 238.369112
Total T: 98383 Episode Num: 3037 Episode T: 50 Reward: -0.317985
Total T: 98405 Episode Num: 3038 Episode T: 22 Reward: -161.977244
Total T: 98466 Episode Num: 3039 Episode T: 61 Reward: 238.557951
Total T: 98524 Episode Num: 3040 Episode T: 58 Reward: 238.274121
Total T: 98540 Episode Num: 3041 Episode T: 16 Reward: -162.242393
Total T: 98598 Episode Num: 3042 Episode T: 58 Reward: 237.659905
Total T: 98