In [2]:
!pip install mujoco mujoco-python-viewer pyvirtualdisplay opencv-python mediapy
!sudo apt-get install xvfb
!pip install xvfbwrapper

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 18 not upgraded.


In [3]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/RL/code') # "RL" 폴더 이름 수정할 것

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Evaluate `SAC` results on `Snapbot`

In [4]:
import mujoco,torch,os
import numpy as np
import matplotlib.pyplot as plt
from mujoco_parser import MuJoCoParserClass
from snapbot_env import SnapbotMarkovDecisionProcessClass
from sac import ReplayBufferClass,ActorClass,CriticClass,get_target
np.set_printoptions(precision=2,suppress=True,linewidth=100)
plt.rc('xtick',labelsize=6); plt.rc('ytick',labelsize=6)
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
print ("MuJoCo version:[%s]"%(mujoco.__version__))

MuJoCo version:[2.3.7]


### Initialize `Snapbot` environment

In [5]:
def np2torch(x_np,device): return torch.tensor(x_np,dtype=torch.float32,device=device)
def torch2np(x_torch): return x_torch.detach().cpu().numpy()
print ("Ready.")

Ready.


In [6]:
xml_path = '../asset/snapbot/scene_snapbot.xml'
env = MuJoCoParserClass(name='Snapbot',rel_xml_path=xml_path,VERBOSE=False)
mdp = SnapbotMarkovDecisionProcessClass(env,HZ=50,history_total_sec=0.2,history_intv_sec=0.1,VERBOSE=True)

[Snapbot] Instantiated
   [info] dt:[0.0200] HZ:[50], env-HZ:[500], mujoco_nstep:[10], state_dim:[35], o_dim:[70], a_dim:[8]
   [history] total_sec:[0.20]sec, n:[10], intv_sec:[0.10]sec, intv_tick:[5]
   [history] ticks:[0 5]


### Check pth files

In [7]:
dir_path = '../result/weights/sac_%s/'%(mdp.name.lower())
filenames = sorted(os.listdir(path=dir_path))
print ("We have [%d] files:"%(len(filenames)))
print (filenames)

We have [21] files:
['episode_0.pth', 'episode_100.pth', 'episode_150.pth', 'episode_200.pth', 'episode_250.pth', 'episode_300.pth', 'episode_350.pth', 'episode_400.pth', 'episode_450.pth', 'episode_50.pth', 'episode_500.pth', 'episode_550.pth', 'episode_600.pth', 'episode_650.pth', 'episode_700.pth', 'episode_750.pth', 'episode_800.pth', 'episode_850.pth', 'episode_900.pth', 'episode_950.pth', 'episode_999.pth']


### Load

In [8]:
epi_idx  = 999
pth_path = '../result/weights/sac_%s/episode_%d.pth'%(mdp.name.lower(),epi_idx)
if os.path.exists(pth_path): print ("We will use [%s]."%(pth_path))
else: print ("ERROR:[%s] DOES NOT EXIST!"%(pth_path))

We will use [../result/weights/sac_snapbot/episode_999.pth].


### Loop

In [18]:
from pyvirtualdisplay import Display
display = Display(visible=0, size=(400, 300))
display.start()

video = []

# Simulator
max_epi_sec  = 10.0 # maximum episode length in second
max_epi_tick = int(max_epi_sec*mdp.HZ) # maximum episode length in tick
print("max_epi_tick: ", max_epi_tick)
# Actor
device       = 'cpu' # cpu / mps / cuda
max_torque   = 2.0
actor = ActorClass(
    obs_dim=mdp.o_dim,h_dims=[256,256],out_dim=mdp.a_dim,max_out=max_torque,device=device).to(device)
# Load pth
actor.load_state_dict(torch.load(pth_path,map_location=device))
# Run
mdp.init_viewer() # init viewer
s = mdp.reset() # reset state
reward_total = 0.0
for tick in range(max_epi_tick):
    a,_ = actor(np2torch(s,device=device),SAMPLE_ACTION=False) # get action
    s_prime,reward,done,info = mdp.step(torch2np(a),max_time=max_epi_sec) # step
    if (tick % 2) == 0:
        mdp.render(TRACK_TORSO=True,PLOT_WORLD_COORD=True,PLOT_TORSO_COORD=True,
                   PLOT_SENSOR=True,PLOT_CONTACT=True,PLOT_TIME=True)
    reward_total += reward
    s = s_prime
    if not mdp.is_viewer_alive(): break

    if (mdp.tick % 5) == 0:
      print("current tick: ", mdp.tick)
      scene_img = mdp.grab_image(resize_rate=0.25)
      video.append(scene_img)


mdp.close_viewer() # close viewer
x_diff = mdp.env.get_p_body('torso')[0]
print ("[Eval] time:[%.1f]sec reward:[%.3f] x_diff:[%.3f]m epi_len:[%d/%d]"%
       (mdp.get_sim_time(),reward_total,x_diff,tick,max_epi_tick))

500
5
10
15
20
25
30
35
40
45
50
55
60
65
70
75
80
85
90
95
100
105
110
115
120
125
130
135
140
145
150
155
160
165
170
175
180
185
190
195
200
205
210
215
220
225
230
235
240
245
250
255
260
265
270
275
280
285
290
295
300
305
310
315
320
325
330
335
340
345
350
355
360
365
370
375
380
385
390
395
400
405
410
415
420
425
430
435
440
445
450
455
460
465
470
475
480
485
490
495
500
[Eval] time:[10.0]sec reward:[363.408] x_diff:[7.716]m epi_len:[499/500]


In [None]:
import cv2, os

shape = video[0].shape
video_folder = ("../video")
if not os.path.isdir(video_folder): os.mkdir(video_folder)
video_out = cv2.VideoWriter(os.path.join(video_folder, "snapbot_sac.mp4"), cv2.VideoWriter_fourcc('F','M','P','4'), 20, (shape[1], shape[0]), True)
for i in range(len(video)):
  video_frame = video[i]
  video_out.write(video_frame)
video_out.release()

In [17]:
import mediapy
video_path = os.path.join(video_folder, "snapbot_sac.mp4")
mediapy.show_video(mediapy.read_video(video_path))


0
This browser does not support the video tag.
