### ARS Evaluation

In [1]:
import datetime,gym,time,os,psutil,ray
import numpy as np
import tensorflow as tf
from util import gpu_sess,suppress_tf_warning,tic,toc,open_txt,write_txt,OnlineMeanVariance
from ars import create_ars_model,get_noises_from_weights,save_ars_model,restore_ars_model
np.set_printoptions(precision=2)
suppress_tf_warning() # suppress warning 
gym.logger.set_level(40) # gym logger 
from episci.environment_wrappers.tactical_action_adt_env_continuous import CustomADTEnvContinuous
from episci.agents.utils.constants import Agents,RewardType,StateInfo
print ("Packaged loaded. TF version is [%s]."%(tf.__version__))

Packaged loaded. TF version is [1.15.0].


### Configuration

In [12]:
action_length = 5 # 50/5 = 10HZ
hdims,actv,out_actv = [128,64],tf.nn.relu,tf.nn.tanh
USE_NZD_OBS = False
seed = 0

### Rollout  Class

In [3]:
class RolloutWorkerClass(object):
    """
    Worker without RAY (for update purposes)
    """
    def __init__(self,
                 hdims=[64]*2,actv=tf.nn.relu,out_actv=tf.nn.tanh,
                 seed=1,USE_NZD_OBS=True):
        self.seed = seed
        from util import suppress_tf_warning
        suppress_tf_warning() # suppress TF warnings
        self.env = get_env()
        odim,adim = self.env.observation_space.shape[0],self.env.action_space.shape[0]
        self.odim,self.adim = odim,adim
        # Observation normalization
        self.obs_mu = np.zeros(self.odim)
        self.obs_std = np.ones(self.odim)
        self.USE_NZD_OBS = USE_NZD_OBS
        # ARS model 
        self.model,self.sess = create_ars_model(
            odim=self.odim,adim=self.adim,hdims=hdims,
            actv=actv,out_actv=out_actv)
        # Initialize model 
        tf.set_random_seed(self.seed)
        np.random.seed(self.seed)
        self.sess.run(tf.global_variables_initializer())
        # Flag to initialize assign operations for 'set_weights()'
        self.FIRST_SET_FLAG = True
    def set_observation_stats(self,obs_mu,obs_std):
        self.obs_mu = obs_mu
        self.obs_std = obs_std
    def get_action(self,o):
        obs_std = self.obs_std
        obs_std[obs_std<1e-6] = np.inf
        if self.USE_NZD_OBS:
            nzd_o = (o-self.obs_mu)/obs_std
        else:
            nzd_o = o
        return self.sess.run(
            self.model['mu'],feed_dict={self.model['o_ph']:nzd_o.reshape(1,-1)})[0]
    def get_weights(self):
        weight_vals = self.sess.run(self.model['main_vars'])
        return weight_vals
    def set_weights(self,weight_vals):
        if self.FIRST_SET_FLAG:
            self.FIRST_SET_FLAG = False
            self.assign_placeholders = []
            self.assign_ops = []
            for w_idx,weight_tf_var in enumerate(self.model['main_vars']):
                a = weight_tf_var
                assign_placeholder = tf.placeholder(a.dtype, shape=a.get_shape())
                assign_op = a.assign(assign_placeholder)
                self.assign_placeholders.append(assign_placeholder)
                self.assign_ops.append(assign_op)
        for w_idx,weight_tf_var in enumerate(self.model['main_vars']):
            self.sess.run(self.assign_ops[w_idx],
                          {self.assign_placeholders[w_idx]:weight_vals[w_idx]})

In [4]:
def get_env(red_distribution=None):
    from episci.environment_wrappers.tactical_action_adt_env_continuous import CustomADTEnvContinuous
    from episci.agents.utils.constants import Agents, RewardType
    env_config = {
        "red_distribution": red_distribution,
        "reward_type": RewardType.SHAPED
    }
    return CustomADTEnvContinuous(env_config,action_length=action_length)

In [5]:
env = get_env()
adim,odim = env.action_space.shape[0],env.observation_space.shape[0]
print ("Environment Ready. odim:[%d] adim:[%d]."%(odim,adim))

Environment Ready. odim:[55] adim:[4].


In [6]:
mv = OnlineMeanVariance()

In [13]:
tf.reset_default_graph()
R = RolloutWorkerClass(hdims=hdims,actv=actv,out_actv=out_actv,seed=seed,USE_NZD_OBS=USE_NZD_OBS)

### Restore

In [14]:
npz_path = '../data/net/ars_adt_cont/model_30.npz'
restore_ars_model(npz_path,R,VERBOSE=True)

[../data/net/ars_adt_cont/model_30.npz] loaded.


### Evaluate

In [15]:
ep_len_rollout = 15000

In [16]:
red = Agents.SPOT_RANDOM
o,d,ep_ret,ep_len = env.reset(red=red),False,0,0
while not(d or (ep_len == ep_len_rollout)):
    a = R.get_action(o)
    o,r,d,_ = env.step(a)
    ep_ret += r # compute return 
    ep_len += 1
blue_health,red_health = env.blue_health,env.red_health

# Other infos
blue_height = env.manager._blue.state[0]
red_height = env.manager._red.state[0]

In [18]:
print ("Health blue:[%.2f] red:[%.2f] Height blue:[%.2f] red:[%.2f] ep_len:[%d]"%
       (blue_health,red_health,blue_height,red_height,ep_len))

Health blue:[1.00] red:[0.00] Height blue:[2348.05] red:[1619.01] ep_len:[2662]
