### Evaluate `SAC` results on `Snapbot`

In [1]:
import mujoco,torch,os
import numpy as np
import matplotlib.pyplot as plt
from mujoco_parser import MuJoCoParserClass
from snapbot_env import SnapbotMarkovDecisionProcessClass
from sac import ReplayBufferClass,ActorClass,CriticClass,get_target
np.set_printoptions(precision=2,suppress=True,linewidth=100)
plt.rc('xtick',labelsize=6); plt.rc('ytick',labelsize=6)
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
print ("MuJoCo version:[%s]"%(mujoco.__version__))

MuJoCo version:[2.3.6]


### Initialize `Snapbot` environment

In [2]:
def np2torch(x_np,device): return torch.tensor(x_np,dtype=torch.float32,device=device)
def torch2np(x_torch): return x_torch.detach().cpu().numpy()
print ("Ready.")

Ready.


In [3]:
xml_path = '../asset/snapbot/scene_snapbot.xml'
env = MuJoCoParserClass(name='Snapbot',rel_xml_path=xml_path,VERBOSE=False)
mdp = SnapbotMarkovDecisionProcessClass(env,HZ=50,history_total_sec=0.2,history_intv_sec=0.1,VERBOSE=True)

[Snapbot] Instantiated
   [info] dt:[0.0200] HZ:[50], env-HZ:[500], mujoco_nstep:[10], state_dim:[35], o_dim:[70], a_dim:[8]
   [history] total_sec:[0.20]sec, n:[10], intv_sec:[0.10]sec, intv_tick:[5]
   [history] ticks:[0 5]


### Check pth files

In [4]:
dir_path = '../result/weights/sac_%s/'%(mdp.name.lower())
filenames = sorted(os.listdir(path=dir_path))
print ("We have [%d] files:"%(len(filenames)))
print (filenames)

We have [21] files:
['episode_0.pth', 'episode_100.pth', 'episode_150.pth', 'episode_200.pth', 'episode_250.pth', 'episode_300.pth', 'episode_350.pth', 'episode_400.pth', 'episode_450.pth', 'episode_50.pth', 'episode_500.pth', 'episode_550.pth', 'episode_600.pth', 'episode_650.pth', 'episode_700.pth', 'episode_750.pth', 'episode_800.pth', 'episode_850.pth', 'episode_900.pth', 'episode_950.pth', 'episode_999.pth']


### Load

In [9]:
epi_idx  = 999
pth_path = '../result/weights/sac_%s/episode_%d.pth'%(mdp.name.lower(),epi_idx)
if os.path.exists(pth_path): print ("We will use [%s]."%(pth_path))
else: print ("ERROR:[%s] DOES NOT EXIST!"%(pth_path))

We will use [../result/weights/sac_snapbot/episode_999.pth].


### Loop

In [10]:
# Simulator
max_epi_sec  = 60.0 # maximum episode length in second
max_epi_tick = int(max_epi_sec*mdp.HZ) # maximum episode length in tick
# Actor
device       = 'cpu' # cpu / mps / cuda
max_torque   = 2.0
actor = ActorClass(
    obs_dim=mdp.o_dim,h_dims=[256,256],out_dim=mdp.a_dim,max_out=max_torque,device=device).to(device)
# Load pth
actor.load_state_dict(torch.load(pth_path,map_location=device))
# Run
mdp.init_viewer() # init viewer
s = mdp.reset() # reset state
mdp.viewer_pause() # pause simulator
print ("Press [space] to start.")
reward_total = 0.0
for tick in range(max_epi_tick):
    a,_ = actor(np2torch(s,device=device),SAMPLE_ACTION=False) # get action
    s_prime,reward,done,info = mdp.step(torch2np(a),max_time=max_epi_sec) # step
    if (tick % 2) == 0:
        mdp.render(TRACK_TORSO=True,PLOT_WORLD_COORD=True,PLOT_TORSO_COORD=True,
                   PLOT_SENSOR=True,PLOT_CONTACT=True,PLOT_TIME=True)
    reward_total += reward
    s = s_prime
    if not mdp.is_viewer_alive(): break
mdp.close_viewer() # close viewer
x_diff = mdp.env.get_p_body('torso')[0]
print ("[Eval] time:[%.1f]sec reward:[%.3f] x_diff:[%.3f]m epi_len:[%d/%d]"%
       (mdp.get_sim_time(),reward_total,x_diff,tick,max_epi_tick))

Press [space] to start.
Pressed ESC
Quitting.
[Eval] time:[30.1]sec reward:[1152.045] x_diff:[23.532]m epi_len:[1502/3000]
