# ARS with ADT Continuous Env

In [1]:
import datetime,gym,time,os,psutil,ray
import numpy as np
import tensorflow as tf
from util import gpu_sess,suppress_tf_warning,tic,toc,open_txt,write_txt,OnlineMeanVariance
from ars import create_ars_model,get_noises_from_weights,save_ars_model,restore_ars_model
np.set_printoptions(precision=2)
suppress_tf_warning() # suppress warning 
gym.logger.set_level(40) # gym logger 

from episci.environment_wrappers.tactical_action_adt_env_continuous import CustomADTEnvContinuous
from episci.agents.utils.constants import Agents,RewardType,StateInfo
print ("Packaged loaded. TF version is [%s]."%(tf.__version__))

Packaged loaded. TF version is [1.15.0].


### Hyperparameters

In [2]:
exp_name = 'ars_adt_cont'
n_cpu = 101
n_workers = 100

In [3]:
action_length = 5 # 50/5 = 10HZ
total_steps,evaluate_every,print_every = 5000,5,1
ep_len_rollout = 15000 # (15000 // action_length)
hdims,actv,out_actv = [64,64],tf.nn.relu,tf.nn.tanh
# alpha:stepsize / nu:exploration std / b: elite set size 
alpha,nu,b = 0.01,0.01,np.ceil(n_workers/10).astype(np.int)
USE_NZD_OBS = False
seed = 0
# Train
red_list_train = [
    Agents.ZOMBIE,
    Agents.SPOT_RANDOM,
    Agents.EXPERT_SYSTEM
]*2
# Evaluation
red_list_eval = [
    Agents.ZOMBIE,
    Agents.ROSIE, 
    Agents.BUD, 
    Agents.BUD_FSM, 
    Agents.EXPERT_SYSTEM
]*n_workers
red_list_eval = red_list_eval[:n_workers]
num_eval = len(red_list_eval) # evaluation
# Restore
npz_path = ''

### Logger

In [4]:
txt_path = '../report/log/%s/log_%s.txt'%(
    exp_name,
    datetime.datetime.now().strftime("%b-%d-%Y-%H:%M:%S"))
f = open_txt(txt_path)
print ("[%s] created."%(txt_path))
time.sleep(1) # wait 

[../report/log/ars_adt_cont/log_Aug-23-2020-04:13:37.txt] created.


### Environment

In [5]:
def get_env(red_distribution=None):
    from episci.environment_wrappers.tactical_action_adt_env_continuous \
        import CustomADTEnvContinuous
    from episci.agents.utils.constants import Agents, RewardType
    env_config = {
        "red_distribution": red_distribution,
        "reward_type": RewardType.SHAPED
    }
    return CustomADTEnvContinuous(env_config,action_length=action_length)

### Worker

In [6]:
class RolloutWorkerClass(object):
    """
    Worker without RAY (for update purposes)
    """
    def __init__(self,
                 hdims=[64]*2,actv=tf.nn.relu,out_actv=tf.nn.tanh,
                 seed=1,USE_NZD_OBS=True):
        self.seed = seed
        from util import suppress_tf_warning
        suppress_tf_warning() # suppress TF warnings
        self.env = get_env()
        odim,adim = self.env.observation_space.shape[0],self.env.action_space.shape[0]
        self.odim,self.adim = odim,adim
        # Observation normalization
        self.obs_mu = np.zeros(self.odim)
        self.obs_std = np.ones(self.odim)
        self.USE_NZD_OBS = USE_NZD_OBS
        # ARS model 
        self.model,self.sess = create_ars_model(
            odim=self.odim,adim=self.adim,hdims=hdims,
            actv=actv,out_actv=out_actv)
        # Initialize model 
        tf.set_random_seed(self.seed)
        np.random.seed(self.seed)
        self.sess.run(tf.global_variables_initializer())
        # Flag to initialize assign operations for 'set_weights()'
        self.FIRST_SET_FLAG = True
    def set_observation_stats(self,obs_mu,obs_std):
        self.obs_mu = obs_mu
        self.obs_std = obs_std
    def get_action(self,o):
        obs_std = self.obs_std
        obs_std[obs_std<1e-6] = np.inf
        if self.USE_NZD_OBS:
            nzd_o = (o-self.obs_mu)/obs_std
        else:
            nzd_o = o
        return self.sess.run(
            self.model['mu'],feed_dict={self.model['o_ph']:nzd_o.reshape(1,-1)})[0]
    def get_weights(self):
        weight_vals = self.sess.run(self.model['main_vars'])
        return weight_vals
    def set_weights(self,weight_vals):
        if self.FIRST_SET_FLAG:
            self.FIRST_SET_FLAG = False
            self.assign_placeholders = []
            self.assign_ops = []
            for w_idx,weight_tf_var in enumerate(self.model['main_vars']):
                a = weight_tf_var
                assign_placeholder = tf.placeholder(a.dtype, shape=a.get_shape())
                assign_op = a.assign(assign_placeholder)
                self.assign_placeholders.append(assign_placeholder)
                self.assign_ops.append(assign_op)
        for w_idx,weight_tf_var in enumerate(self.model['main_vars']):
            self.sess.run(self.assign_ops[w_idx],
                          {self.assign_placeholders[w_idx]:weight_vals[w_idx]})
            
@ray.remote
class RayRolloutWorkerClass(object):
    """
    Rollout Worker with RAY
    """
    def __init__(self,worker_id=0,
                 hdims=[128],actv=tf.nn.relu,out_actv=tf.nn.tanh,
                 ep_len_rollout=15000,USE_NZD_OBS=True):
        self.worker_id = worker_id
        self.ep_len_rollout = ep_len_rollout
        from util import suppress_tf_warning
        suppress_tf_warning() # suppress TF warnings
        self.env = get_env()
        odim,adim = self.env.observation_space.shape[0],self.env.action_space.shape[0]
        self.odim,self.adim = odim,adim
        self.USE_NZD_OBS = USE_NZD_OBS
        # Observation normalization
        self.obs_mu = np.zeros(self.odim)
        self.obs_std = np.ones(self.odim)
        # ARS model 
        self.model,self.sess = create_ars_model(
            odim=self.odim,adim=self.adim,hdims=hdims,
            actv=actv,out_actv=out_actv)
        # Flag to initialize assign operations for 'set_weights()'
        self.FIRST_SET_FLAG = True        
        print ("Ray Worker [%d] Ready."%(self.worker_id))
        
    def set_observation_stats(self,obs_mu,obs_std):
        self.obs_mu = np.copy(obs_mu) # call by value
        self.obs_std = np.copy(obs_std) # call by value
    def get_action(self,o):
        obs_std = self.obs_std
        obs_std[obs_std<1e-6] = np.inf
        if self.USE_NZD_OBS:
            nzd_o = (o-self.obs_mu)/obs_std # use whitened observation 
        else:
            nzd_o = o
        return self.sess.run(
            self.model['mu'],feed_dict={self.model['o_ph']:nzd_o.reshape(1,-1)})[0]
    def set_weights(self,weight_vals,noise_vals,noise_sign=+1):
        if self.FIRST_SET_FLAG:
            self.FIRST_SET_FLAG = False
            self.assign_placeholders = []
            self.assign_ops = []
            for w_idx,weight_tf_var in enumerate(self.model['main_vars']):
                a = weight_tf_var
                assign_placeholder = tf.placeholder(a.dtype, shape=a.get_shape())
                assign_op = a.assign(assign_placeholder)
                self.assign_placeholders.append(assign_placeholder)
                self.assign_ops.append(assign_op)
        for w_idx,weight_tf_var in enumerate(self.model['main_vars']):
            self.sess.run(self.assign_ops[w_idx],
                          {self.assign_placeholders[w_idx]:
                           weight_vals[w_idx]+noise_sign*noise_vals[w_idx]})
    def rollout(self,
                red_list=[Agents.SPOT_RANDOM,Agents.EXPERT_SYSTEM]):
        """
        Rollout
        """
        obs_buffer,obs_cnt = np.zeros((len(red_list)*self.ep_len_rollout,self.odim)),0
        r_sum,n_step = 0,0 # reward sum and total steps
        for r_idx,red in enumerate(red_list): # for each red policy
            # Specify red policy
            self.o = self.env.reset(red=red) # reset env 
            for t in range(self.ep_len_rollout):
                self.a = self.get_action(self.o) 
                self.o2,self.r,self.d,_ = self.env.step(self.a)
                # Save next state 
                self.o = self.o2
                # Accumulate reward
                r_sum += self.r
                n_step += 1
                # Stack observation
                obs_buffer[obs_cnt,:] = self.o
                obs_cnt += 1
                if self.d: 
                    break 
        # Compute the average return and steps 
        r_avg = r_sum / len(red_list)
        n_step_avg = n_step / len(red_list)
        obs_buffer = obs_buffer[:obs_cnt,:] # trim observation buffer 
        return r_avg,n_step_avg,obs_buffer
    def evaluate(self,red=None):
        o,d,ep_ret,ep_len = self.env.reset(red=red),False,0,0
        while not(d or (ep_len == self.ep_len_rollout)):
            a = self.get_action(o)
            o,r,d,_ = self.env.step(a)
            ep_ret += r # compute return 
            ep_len += 1
        blue_health,red_health = self.env.blue_health,self.env.red_health
        
        # Other infos
        blue_height = self.env.manager._blue.state[0]
        red_height = self.env.manager._red.state[0]
        
        # return / length / blue health / red health / blue height / red height
        eval_res = [ep_ret,ep_len,blue_health,red_health,blue_height,red_height]
        return eval_res

### Initialize

In [7]:
env = get_env()
adim,odim = env.action_space.shape[0],env.observation_space.shape[0]
print ("Environment Ready. odim:[%d] adim:[%d]."%(odim,adim))

Environment Ready. odim:[55] adim:[4].


### Observation normalizer

In [8]:
mv = OnlineMeanVariance()

### Initialize Workers

In [9]:
ray.init(num_cpus=n_cpu)
tf.reset_default_graph()
R = RolloutWorkerClass(hdims=hdims,actv=actv,out_actv=out_actv,seed=seed,USE_NZD_OBS=USE_NZD_OBS)
workers = [RayRolloutWorkerClass.remote(
    worker_id=i,hdims=hdims,actv=actv,out_actv=out_actv,
    ep_len_rollout=ep_len_rollout,USE_NZD_OBS=USE_NZD_OBS)
           for i in range(n_workers)]
print ("RAY initialized with [%d] cpus and [%d] workers."%
       (n_cpu,n_workers))

2020-08-23 04:13:38,462	INFO resource_spec.py:212 -- Starting Ray with 165.62 GiB memory available for workers and up to 74.99 GiB for objects. You can adjust these settings with ray.init(memory=<bytes>, object_store_memory=<bytes>).
2020-08-23 04:13:38,915	INFO services.py:1165 -- View the Ray dashboard at [1m[32mlocalhost:8265[39m[22m


RAY initialized with [101] cpus and [100] workers.


### Loop

In [None]:
npz_path_list,ep_ret_avg,ep_ret_avg_list = [],0.0,[]
start_time = time.time()
n_env_step = 0 # number of environment steps
for t in range(int(total_steps)): # for all steps 
    esec = time.time()-start_time
    
    # 1. Distribute the central weights to distributed workers
    weights = R.get_weights() # weights of the central worker 
    noises_list = []
    for _ in range(n_workers):
        noises_list.append(get_noises_from_weights(weights,nu=nu))

    # 2. Positive rollouts using distributed workers
    set_weights_list = [worker.set_weights.remote(weights,noises,noise_sign=+1) 
                        for worker,noises in zip(workers,noises_list)] # set weights
    rollout_ops = [worker.rollout.remote(red_list=red_list_train)
           for worker in workers] # do positive rollouts
    res_pos_rollout = ray.get(rollout_ops) # get positive rollout results
    pos_rets,r_idx = np.zeros(n_workers),0
    for pos_ret,ep_len,obs_buffer in res_pos_rollout:
        pos_rets[r_idx] = pos_ret # return
        r_idx = r_idx + 1
        n_env_step += ep_len # accumulate episode length
        for obs in obs_buffer: mv.include(obs) # update observation mean and std

    # 3. Negative rollouts using distributed workers
    set_weights_list = [worker.set_weights.remote(weights,noises,noise_sign=-1) 
                        for worker,noises in zip(workers,noises_list)] # set weights
    rollout_ops = [worker.rollout.remote(red_list=red_list_train)
           for worker in workers] # do negative rollouts
    res_neg_rollout = ray.get(rollout_ops) # get negative rollout results
    neg_rets,r_idx = np.zeros(n_workers),0
    for neg_ret,ep_len,obs_buffer in res_neg_rollout:
        neg_rets[r_idx] = neg_ret # return
        r_idx = r_idx + 1
        n_env_step += ep_len # accumulate episode length
        for obs in obs_buffer: mv.include(obs) # update observation mean and std

    # 4. Compute return statistics and Update
    #  'pos_rets' 'neg_rets'
    concat_rets = np.concatenate((pos_rets,neg_rets)) # concatenated returns [2*n_workers]
    ret_deltas = pos_rets - neg_rets # return difference [n_workers]
    max_rets = np.maximum(pos_rets,neg_rets) # maximum returns [n_workers]
    max_ret = np.max(max_rets) # maximum return [1]
    avg_ret = np.mean(max_rets) # average return [1]
    sort_idx = np.argsort(-max_rets) # sort for resampling
    sigma_R = np.std(concat_rets)
    weights_updated = []
    for w_idx,weight in enumerate(weights): # for each weight 
        delta_weight_sum = np.zeros_like(weight)
        for k in range(b):
            idx_k = sort_idx[k] # sorted index
            ret_delta_k,noises_k,max_ret_k = ret_deltas[idx_k],noises_list[idx_k],max_rets[idx_k]
            noise_k = (1/nu)*noises_k[w_idx] # noise for current weight
            delta_weight_sum += ret_delta_k*noise_k # weighted sum
        delta_weight = (alpha/(b*sigma_R))*delta_weight_sum
        weight = weight + delta_weight
        weights_updated.append(weight) 

    # 5. Set weights of the central worker 
    R.set_weights(weights_updated)

    # Print
    if (t == 0) or (((t+1)%print_every) == 0):
        print ("[%d/%d] time:[%s] max_ret:[%.2f] avg_ret:[%.2f] sigma_R:[%.2f] "%
               (t,total_steps,time.strftime("day:[%d] %H:%M:%S", time.gmtime(time.time()-start_time)),
               max_ret,avg_ret,sigma_R))
        write_txt(f,"%.2f, r_train:%.4f, ret_eval:%.4f"%(time.time()-start_time,avg_ret,ep_ret_avg),
                  ADD_NEWLINE=True,DO_PRINT=False)
        
    # 6. Distribute the central weights to the distributed workers
    weights = R.get_weights() # get the updated weights from the central worker
    zero_noises_list = []
    for _ in range(n_workers):
        zero_noises_list.append(get_noises_from_weights(weights,nu=0))
    set_weights_list = [worker.set_weights.remote(weights,zero_noises,noise_sign=0) 
                        for worker,zero_noises in zip(workers,zero_noises_list)] 
    
    # Evaluate
    if (t == 0) or (((t+1)%evaluate_every) == 0): 
        ram_percent = psutil.virtual_memory().percent # memory usage
        print ("[Eval. start] step:[%d/%d][%.1f%%] #step:[%.1e] time:[%s] ram:[%.1f%%]."%
               (t+1,total_steps,t/total_steps*100,n_env_step,
                time.strftime("day:[%d] %H:%M:%S", time.gmtime(time.time()-start_time)),
                ram_percent)
              )
        ops = []
        for i_idx in range(num_eval):
            worker,red = workers[i_idx],red_list_eval[i_idx]
            ops.append(worker.evaluate.remote(red=red))
        eval_vals = ray.get(ops)
        
        ep_ret_sum = 0
        for i_idx in range(num_eval):
            red,eval_val = red_list_eval[i_idx],eval_vals[i_idx]
            ep_ret,ep_len,blue_health,red_health = eval_val[0],eval_val[1],eval_val[2],eval_val[3]
            blue_height,red_height = eval_val[4],eval_val[5]
            ep_ret_sum += ep_ret
            print (" [%d/%d][%s] Ret:[%.2f] Len:[%d]. Health B:[%.2f] R:[%.2f] Height B:[%.1e] R:[%.1e]"
                %(i_idx,len(eval_vals),red,ep_ret,ep_len,blue_health,red_health,blue_height,red_height))
        ep_ret_avg = ep_ret_sum / num_eval
        print ("[Eval. done] Time:[%s] Ret_Avg:[%.3f].\n"%
               (time.strftime("day:[%d] %H:%M:%S", time.gmtime(time.time()-start_time)),
                ep_ret_avg)
              )
        
        # Save
        npz_path = '../report/net/%s/model_%d.npz'%(exp_name,t+1)
        save_ars_model(npz_path,R,mv=mv,VERBOSE=False)
        
        # If something went bad, restore
        npz_path_list.append(npz_path)
        ep_ret_avg_list.append(ep_ret_avg)
        ep_ret_avg_array = np.asanyarray(ep_ret_avg_list)
        ep_ret_avg_max = np.max(ep_ret_avg_array)
        if (ep_ret_avg < 0.5*ep_ret_avg_max) and (len(npz_path_list) >= 2) and (ep_ret_avg_max > 0):
            npz_path = npz_path_list[np.argmax(ep_ret_avg_array)] 
            restore_ars_model(npz_path=npz_path,R=R,VERBOSE=False)
            print ("Restoring [%s] as current[%.2f] is way lower than max[%.2f]"%
                   (npz_path,ep_ret_avg,ep_ret_avg_max))
            
    # 7. Distribute observation mean and std to workers (after evaluation)
    obs_mean,obs_std = mv.mean,mv.std
    sef_obs_list= [worker.set_observation_stats.remote(obs_mean,obs_std) 
                   for worker in workers] # set observation mean and std
    
    # Loop 
    # break # for debugging 
    
print ("Done.")

[2m[36m(pid=102148)[0m 
[2m[36m(pid=102148)[0m 
[2m[36m(pid=102148)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102159)[0m 
[2m[36m(pid=102159)[0m 
[2m[36m(pid=102159)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102130)[0m 
[2m[36m(pid=102130)[0m 
[2m[36m(pid=102130)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102139)[0m 
[2m[36m(pid=102139)[0m 
[2m[36m(pid=102139)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102149)[0m 
[2m[36m(pid=102149)[0m 
[2m[36m(pid=102149)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102081)[0m 
[2m[36m(pid=102081)[0m 
[2m[36m(pid=102081)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102114)[0m 
[2m[36m(pid=102114)[0m 
[2m[36m(pid=102114)[0m      JSBSim Flight Dynamics Model v1.1

[2m[36m(pid=102164)[0m 
[2m[36m(pid=102164)[0m 
[2m[36m(pid=102164)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102160)[0m 
[2m[36m(pid=102160)[0m 
[2m[36m(pid=102160)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102085)[0m Ray Worker [28] Ready.
[2m[36m(pid=102113)[0m 
[2m[36m(pid=102113)[0m 
[2m[36m(pid=102113)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102094)[0m 
[2m[36m(pid=102094)[0m 
[2m[36m(pid=102094)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102087)[0m 
[2m[36m(pid=102087)[0m 
[2m[36m(pid=102087)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102118)[0m 
[2m[36m(pid=102118)[0m 
[2m[36m(pid=102118)[0m      JSBSim Flight Dynamics Model v1.1.0.dev1 Jul 11 2020 05:35:14
[2m[36m(pid=102165)[0m 
[2m[36m(pid=102165)[0m 
[2m[36m(pid=1

[2m[36m(pid=102136)[0m Ray Worker [56] Ready.
[0/5000] time:[day:[01] 00:34:48] max_ret:[54.20] avg_ret:[15.96] sigma_R:[20.43] 
[Eval. start] step:[1/5000][0.0%] #step:[5.1e+05] time:[day:[01] 00:34:48] ram:[13.7%].
 [0/100][zombie] Ret:[1.88] Len:[3001]. Health B:[1.00] R:[1.00] Height B:[1.8e+04] R:[1.8e+04]
 [1/100][rosie] Ret:[1.14] Len:[3001]. Health B:[1.00] R:[1.00] Height B:[1.4e+04] R:[1.4e+04]
 [2/100][bud] Ret:[-0.04] Len:[3001]. Health B:[1.00] R:[1.00] Height B:[1.1e+04] R:[1.1e+04]
 [3/100][bud_fsm] Ret:[100.76] Len:[2459]. Health B:[1.00] R:[0.00] Height B:[6.6e+03] R:[7.2e+03]
 [4/100][expert_system] Ret:[-100.24] Len:[1409]. Health B:[0.00] R:[1.00] Height B:[3.0e+03] R:[2.1e+03]
 [5/100][zombie] Ret:[-0.21] Len:[3001]. Health B:[1.00] R:[1.00] Height B:[1.8e+04] R:[1.6e+04]
 [6/100][rosie] Ret:[7.50] Len:[3001]. Health B:[1.00] R:[0.94] Height B:[7.3e+03] R:[7.2e+03]
 [7/100][bud] Ret:[8.94] Len:[3001]. Health B:[1.00] R:[0.92] Height B:[1.5e+04] R:[1.4e+04]
 [8/1

[1/5000] time:[day:[01] 01:11:59] max_ret:[63.66] avg_ret:[18.64] sigma_R:[21.79] 
[2/5000] time:[day:[01] 01:44:32] max_ret:[78.15] avg_ret:[24.03] sigma_R:[22.06] 
[3/5000] time:[day:[01] 02:15:38] max_ret:[93.60] avg_ret:[30.22] sigma_R:[25.31] 
[4/5000] time:[day:[01] 02:45:35] max_ret:[99.74] avg_ret:[34.33] sigma_R:[24.09] 
[Eval. start] step:[5/5000][0.1%] #step:[2.4e+06] time:[day:[01] 02:45:35] ram:[14.4%].
 [0/100][zombie] Ret:[100.12] Len:[91]. Health B:[1.00] R:[0.00] Height B:[5.7e+03] R:[5.7e+03]
 [1/100][rosie] Ret:[100.60] Len:[1456]. Health B:[1.00] R:[0.00] Height B:[1.4e+04] R:[1.4e+04]
 [2/100][bud] Ret:[0.38] Len:[3001]. Health B:[1.00] R:[1.00] Height B:[1.3e+03] R:[2.7e+03]
 [3/100][bud_fsm] Ret:[100.39] Len:[701]. Health B:[1.00] R:[0.00] Height B:[1.1e+04] R:[1.2e+04]
 [4/100][expert_system] Ret:[-100.21] Len:[534]. Health B:[0.00] R:[1.00] Height B:[1.9e+03] R:[2.2e+03]
 [5/100][zombie] Ret:[-0.05] Len:[3001]. Health B:[1.00] R:[1.00] Height B:[1.1e+04] R:[1.5

[5/5000] time:[day:[01] 03:17:44] max_ret:[100.41] avg_ret:[34.22] sigma_R:[25.06] 
[6/5000] time:[day:[01] 03:47:50] max_ret:[83.68] avg_ret:[39.16] sigma_R:[26.88] 
[7/5000] time:[day:[01] 04:17:14] max_ret:[99.55] avg_ret:[38.92] sigma_R:[25.68] 
[8/5000] time:[day:[01] 04:46:56] max_ret:[84.55] avg_ret:[37.20] sigma_R:[25.46] 
[9/5000] time:[day:[01] 05:15:15] max_ret:[98.45] avg_ret:[40.28] sigma_R:[25.14] 
[Eval. start] step:[10/5000][0.2%] #step:[4.4e+06] time:[day:[01] 05:15:15] ram:[14.6%].
 [0/100][zombie] Ret:[0.09] Len:[3001]. Health B:[1.00] R:[1.00] Height B:[8.4e+03] R:[9.4e+03]
 [1/100][rosie] Ret:[100.63] Len:[1426]. Health B:[1.00] R:[0.00] Height B:[9.9e+03] R:[9.9e+03]
 [2/100][bud] Ret:[100.35] Len:[2495]. Health B:[1.00] R:[0.00] Height B:[1.5e+03] R:[1.0e+03]
 [3/100][bud_fsm] Ret:[100.35] Len:[478]. Health B:[1.00] R:[0.00] Height B:[1.0e+04] R:[1.1e+04]
 [4/100][expert_system] Ret:[100.05] Len:[2641]. Health B:[1.00] R:[0.00] Height B:[1.9e+03] R:[2.1e+03]
 [5/

[10/5000] time:[day:[01] 05:46:49] max_ret:[100.53] avg_ret:[37.50] sigma_R:[23.44] 
[2m[36m(pid=102171)[0m   rel_pos = 1/distance *(pos_red - pos_blue)
[2m[36m(pid=102171)[0m Exception in thread Thread-559:
[2m[36m(pid=102171)[0m Traceback (most recent call last):
[2m[36m(pid=102171)[0m   File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
[2m[36m(pid=102171)[0m     self.run()
[2m[36m(pid=102171)[0m   File "/home/schoi/.adt/venv-adt/lib/python3.6/site-packages/adt/render_thread.py", line 34, in run
[2m[36m(pid=102171)[0m     self.render()
[2m[36m(pid=102171)[0m   File "/home/schoi/.adt/venv-adt/lib/python3.6/site-packages/adt/render_thread.py", line 50, in render
[2m[36m(pid=102171)[0m     self._env.render(mode='dis')
[2m[36m(pid=102171)[0m   File "/home/schoi/.adt/venv-adt/lib/python3.6/site-packages/gym_jsbsim/environment.py", line 286, in render
[2m[36m(pid=102171)[0m     self.dis_visualiser.send_dataPdu(self.sim)
[2m[36m(pid=102

[2m[36m(pid=102159)[0m   rel_pos = 1/distance *(pos_red - pos_blue)
[2m[36m(pid=102159)[0m Exception in thread Thread-767:
[2m[36m(pid=102159)[0m Traceback (most recent call last):
[2m[36m(pid=102159)[0m   File "/usr/lib/python3.6/threading.py", line 916, in _bootstrap_inner
[2m[36m(pid=102159)[0m     self.run()
[2m[36m(pid=102159)[0m   File "/home/schoi/.adt/venv-adt/lib/python3.6/site-packages/adt/render_thread.py", line 34, in run
[2m[36m(pid=102159)[0m     self.render()
[2m[36m(pid=102159)[0m   File "/home/schoi/.adt/venv-adt/lib/python3.6/site-packages/adt/render_thread.py", line 50, in render
[2m[36m(pid=102159)[0m     self._env.render(mode='dis')
[2m[36m(pid=102159)[0m   File "/home/schoi/.adt/venv-adt/lib/python3.6/site-packages/gym_jsbsim/environment.py", line 286, in render
[2m[36m(pid=102159)[0m     self.dis_visualiser.send_dataPdu(self.sim)
[2m[36m(pid=102159)[0m   File "/home/schoi/.adt/venv-adt/lib/python3.6/site-packages/gym_jsbsim/vis

[20/5000] time:[day:[01] 10:43:29] max_ret:[89.57] avg_ret:[41.40] sigma_R:[24.61] 
[21/5000] time:[day:[01] 11:12:53] max_ret:[100.31] avg_ret:[43.68] sigma_R:[25.97] 
[22/5000] time:[day:[01] 11:43:14] max_ret:[100.72] avg_ret:[44.97] sigma_R:[26.13] 
[23/5000] time:[day:[01] 12:12:18] max_ret:[99.03] avg_ret:[46.97] sigma_R:[26.92] 
[24/5000] time:[day:[01] 12:41:03] max_ret:[100.91] avg_ret:[49.86] sigma_R:[26.28] 
[Eval. start] step:[25/5000][0.5%] #step:[1.1e+07] time:[day:[01] 12:41:03] ram:[14.9%].
 [0/100][zombie] Ret:[100.70] Len:[663]. Health B:[1.00] R:[0.00] Height B:[1.2e+04] R:[1.2e+04]
 [1/100][rosie] Ret:[100.75] Len:[1419]. Health B:[1.00] R:[0.00] Height B:[1.2e+04] R:[1.2e+04]
 [2/100][bud] Ret:[0.35] Len:[3001]. Health B:[1.00] R:[1.00] Height B:[1.3e+04] R:[1.3e+04]
 [3/100][bud_fsm] Ret:[0.42] Len:[3001]. Health B:[1.00] R:[1.00] Height B:[1.3e+04] R:[1.3e+04]
 [4/100][expert_system] Ret:[101.24] Len:[2177]. Health B:[1.00] R:[0.00] Height B:[4.5e+03] R:[5.5e+03]