In [10]:
!pip install mujoco mujoco-python-viewer pyvirtualdisplay opencv-python mediapy
!sudo apt-get install xvfb
!pip install xvfbwrapper

Collecting pyvirtualdisplay
  Downloading PyVirtualDisplay-3.0-py3-none-any.whl (15 kB)
Installing collected packages: pyvirtualdisplay
Successfully installed pyvirtualdisplay-3.0
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
xvfb is already the newest version (2:21.1.4-2ubuntu1.7~22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 18 not upgraded.


In [2]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/MyDrive/RL/code') # "RL" 폴더 이름 수정할 것

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Construct `MDP` using `Snapbot`

In [3]:
import mujoco
import numpy as np
import matplotlib.pyplot as plt
from mujoco_parser import MuJoCoParserClass
from snapbot_env import SnapbotMarkovDecisionProcessClass
np.set_printoptions(precision=2,suppress=True,linewidth=100)
plt.rc('xtick',labelsize=6); plt.rc('ytick',labelsize=6)
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
print ("MuJoCo version:[%s]"%(mujoco.__version__))

MuJoCo version:[2.3.7]


### Parse `scene_snapbot.xml`

In [4]:
xml_path = '../asset/snapbot/scene_snapbot.xml'
env = MuJoCoParserClass(name='Snapbot',rel_xml_path=xml_path,VERBOSE=True)

dt:[0.0020] HZ:[500]
n_dof (=nv):[24]
n_geom:[24]
geom_names:['floor', 'body', 'camera_module_1', 'camera_module_2', 'leg_module_1_1', 'leg_module_1_2', 'leg_module_1_3', 'leg_module_1_4', 'leg_module_1_4bar', 'leg_module_2_1', 'leg_module_2_2', 'leg_module_2_3', 'leg_module_2_4', 'leg_module_2_4bar', 'leg_module_4_1', 'leg_module_4_2', 'leg_module_4_3', 'leg_module_4_4', 'leg_module_4_4bar', 'leg_module_5_1', 'leg_module_5_2', 'leg_module_5_3', 'leg_module_5_4', 'leg_module_5_4bar']
n_body:[24]
body_names:['world', 'torso', 'Camera_module_1', 'Camera_module_2', 'Leg_module_1_1', 'Leg_module_1_2', 'Leg_module_1_3', 'Leg_module_1_4', 'Leg_module_1_4bar', 'Leg_module_2_1', 'Leg_module_2_2', 'Leg_module_2_3', 'Leg_module_2_4', 'Leg_module_2_4bar', 'Leg_module_4_1', 'Leg_module_4_2', 'Leg_module_4_3', 'Leg_module_4_4', 'Leg_module_4_4bar', 'Leg_module_5_1', 'Leg_module_5_2', 'Leg_module_5_3', 'Leg_module_5_4', 'Leg_module_5_4bar']
n_joint:[19]
joint_names:['free', 'camera_1', 'camera_2', '

### Construct `MDP`

In [5]:
mdp = SnapbotMarkovDecisionProcessClass(env,HZ=50,history_total_sec=1.0,history_intv_sec=0.1,VERBOSE=True)

[Snapbot] Instantiated
   [info] dt:[0.0200] HZ:[50], env-HZ:[500], mujoco_nstep:[10], state_dim:[35], o_dim:[350], a_dim:[8]
   [history] total_sec:[1.00]sec, n:[50], intv_sec:[0.10]sec, intv_tick:[5]
   [history] ticks:[ 0  5 10 15 20 25 30 35 40 45]


In [6]:
# Update maximum torque
max_torque = 2
mdp.env.ctrl_ranges[:,0] = -max_torque
mdp.env.ctrl_ranges[:,1] = +max_torque
print ("mdp.env.ctrl_ranges:\n",mdp.env.ctrl_ranges)

mdp.env.ctrl_ranges:
 [[-2.  2.]
 [-2.  2.]
 [-2.  2.]
 [-2.  2.]
 [-2.  2.]
 [-2.  2.]
 [-2.  2.]
 [-2.  2.]]


### Loop

In [None]:
from pyvirtualdisplay import Display
import mediapy
display = Display(visible=0, size=(400, 300))
display.start()

# Initialize viewer
mdp.init_viewer()
# Loop
o = mdp.reset()
max_time = 1000
while (mdp.get_sim_time() < max_time) and mdp.is_viewer_alive():
    # Step
    a = mdp.sample_action()
    o_prime,r,d,info = mdp.step(a=a,max_time=5.0)
    # Render
    mdp.render(TRACK_TORSO=True,PLOT_WORLD_COORD=True,PLOT_TORSO_COORD=True,
               PLOT_SENSOR=True,PLOT_CONTACT=True,PLOT_TIME=True)
    # Print
    if (mdp.tick % 10) == 0:
        print ("tick:[%d] time:[%.2f/%.2f]sec yaw:[%.1f]deg reward:[%.2f] done:[%d]"%
               (mdp.tick,mdp.get_sim_time(),max_time,info['yaw_torso_deg_curr'],r,d))
        print (" Reward fwd:[%.2f] collision:[%.1f] survive:[%.1f] heading:[%.3f] lane:[%.3f]"%
               (info['r_forward'],info['r_collision'],info['r_survive'],info['r_heading'],info['r_lane']))
    # Plot
    if (mdp.tick % 20) == 0:
        scene_img = mdp.grab_image(resize_rate=0.25)
        mediapy.show_image(scene_img, title="Tick:[%d] Time:[%.2f]sec"%(mdp.tick,mdp.get_sim_time()))
# Close viewer
mdp.close_viewer()
print ("Done.")

