# Robotics with Python: MuJoCo & Gym

### Setup

In [2]:
#pip install mujoco
#pip install gymnasium

import gymnasium as gym
import time

env = gym.make("Humanoid-v4", render_mode="human")
obs, info = env.reset()
env.render()

In [4]:
info

{'x_position': np.float64(0.0016152704880662335),
 'y_position': np.float64(-0.006376264384969079),
 'tendon_length': array([ 0.0039346 , -0.00659782]),
 'tendon_velocity': array([-0.00065977,  0.00110333]),
 'distance_from_origin': np.float64(0.006577677877233183)}

In [2]:
obs

array([ 1.40551438e+00,  1.00249109e+00,  1.02295992e-03,  6.11636741e-03,
        8.89343991e-03,  3.68799654e-03,  2.68748631e-03, -7.37148320e-03,
       -1.27035634e-03, -3.83448487e-03,  4.97995726e-03, -9.27465782e-04,
       -5.30276103e-03, -6.80604484e-04, -9.18529355e-03, -1.14950454e-03,
       -7.01506517e-03,  3.62228355e-03,  8.04498421e-03, -5.30207130e-03,
       -1.90562795e-03, -6.06947519e-05,  2.28593828e-05,  6.61675641e-03,
        9.77447190e-03, -6.79145658e-03,  9.35076611e-03,  9.40259276e-03,
        6.24602486e-03, -7.69409031e-03,  7.79339588e-03, -3.57860426e-03,
        8.84452451e-03,  2.14833940e-03,  3.15900130e-03, -3.69123743e-03,
        5.47496853e-03,  3.01387670e-03,  9.26404279e-03, -1.41988753e-03,
        1.43547743e-03, -1.56219701e-03,  6.16717799e-03,  5.88406125e-03,
        6.06353189e-03,  2.30359698e+00,  2.28564819e+00,  4.23921394e-02,
        3.81892603e-04,  4.27251199e-02, -1.34221053e-03, -9.78046855e-02,
        3.05445261e-03,  

In [3]:
env.action_space

Box(-0.4, 0.4, (17,), float32)

In [6]:
env.action_space.sample()

array([-0.05293579, -0.02909377,  0.12628955,  0.15863812, -0.14441639,
       -0.0146535 , -0.04183137,  0.30172417, -0.05358713, -0.20124765,
       -0.3164999 , -0.08170177, -0.13806766, -0.0086144 , -0.1124486 ,
       -0.22309826,  0.36465538], dtype=float32)

In [3]:
import gymnasium as gym
import time

env = gym.make("Humanoid-v4", render_mode="human")
obs, info = env.reset()

reset = False #reset if the humanoid falls or the episode ends

for step in range(240):
    action = env.action_space.sample() #random action
    obs, reward, terminated, truncated, info = env.step(action) #add a physics step (CPU speed = 0.1 seconds)
    env.render() 
    time.sleep(1/240) #slow down to real-time (240 steps × 1/240 second sleep = 1 second)
    if reset:
        if terminated or truncated:
            obs, info = env.reset()

env.close()

### Reinforcement Learning

###### Random

In [4]:
import gymnasium as gym
import time

env = gym.make("Humanoid-v4", render_mode="human")
obs, info = env.reset()

reset = False #reset if the humanoid falls or the episode ends
episode = 1
total_reward, step = 0, 0

for _ in range(240):
    ## action
    step += 1
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    ## reward
    total_reward += reward
    ## render
    env.render() 
    time.sleep(1/240)
    if (step == 1) or (step % 100 == 0): #print first step and every 100 steps
        print(f"EPISODE {episode} - Step:{step}, Reward:{reward:.1f}, Total:{total_reward:.1f}")
    ## reset
    if reset:
        if terminated or truncated: #print the last step
            print(f"EPISODE {episode} - Step:{step}, Reward:{reward:.1f}, Total:{total_reward:.1f}")
            obs, info = env.reset()
            episode += 1
            total_reward, step = 0, 0
            print("------------------------------------------")

env.close()

Exception ignored in: <function WindowViewer.__del__ at 0x7fa2c00ecf80>
Traceback (most recent call last):
  File "/Users/mdp/opt/anaconda3/envs/TORCH/lib/python3.7/site-packages/gymnasium/envs/mujoco/mujoco_rendering.py", line 335, in __del__
    self.free()
  File "/Users/mdp/opt/anaconda3/envs/TORCH/lib/python3.7/site-packages/gymnasium/envs/mujoco/mujoco_rendering.py", line 330, in free
    glfw.destroy_window(self.window)
  File "/Users/mdp/opt/anaconda3/envs/TORCH/lib/python3.7/site-packages/glfw/__init__.py", line 1282, in destroy_window
    window_addr = ctypes.cast(ctypes.pointer(window),
TypeError: _type_ must have storage info


EPISODE 1 - Step:1, Reward:4.9, Total:4.9
EPISODE 1 - Step:100, Reward:4.9, Total:476.1
EPISODE 1 - Step:200, Reward:4.8, Total:977.2


###### Simple

In [2]:
import gymnasium as gym
import time
import numpy as np

env = gym.make("Humanoid-v4", render_mode="human")
obs, info = env.reset()

reset = True #reset if the humanoid falls or the episode ends
episode = 1
total_reward, step = 0, 0
exploration_rate = 0.5 #start wild
preferred_action = np.zeros(env.action_space.shape) #knowledge to update with experience

for _ in range(1000):
    ## action
    step += 1
    exploration = np.random.normal(loc=0, scale=exploration_rate, size=env.action_space.shape) #add random noise
    action = np.clip(a=preferred_action+exploration, a_min=-1, a_max=1)
    obs, reward, terminated, truncated, info = env.step(action) 
    ## reward
    total_reward += reward
    if reward > 0:
        preferred_action += (action-preferred_action)*0.05 #learning_rate
    exploration_rate = max(0.05, exploration_rate*0.99) #min_exploration=0.05, decay_exploration=0.99
    ## render
    env.render() 
    time.sleep(1/240)
    if (step == 1) or (step % 100 == 0): #print first step and every 100 steps
        print(f"EPISODE {episode} - Step:{step}, Reward:{reward:.1f}, Total:{total_reward:.1f}")
    ## reset
    if reset:
        if terminated or truncated: #print the last step
            print(f"EPISODE {episode} - Step:{step}, Reward:{reward:.1f}, Total:{total_reward:.1f}")
            obs, info = env.reset()
            episode += 1
            total_reward, step = 0, 0
            print("------------------------------------------")

env.close()

EPISODE 1 - Step:1, Reward:4.7, Total:4.7
EPISODE 1 - Step:17, Reward:4.7, Total:79.1
------------------------------------------
EPISODE 2 - Step:1, Reward:4.8, Total:4.8
EPISODE 2 - Step:24, Reward:5.6, Total:120.9
------------------------------------------
EPISODE 3 - Step:1, Reward:4.8, Total:4.8
EPISODE 3 - Step:17, Reward:4.8, Total:82.0
------------------------------------------
EPISODE 4 - Step:1, Reward:4.9, Total:4.9
EPISODE 4 - Step:18, Reward:5.0, Total:87.9
------------------------------------------
EPISODE 5 - Step:1, Reward:4.9, Total:4.9
EPISODE 5 - Step:18, Reward:5.1, Total:88.0
------------------------------------------
EPISODE 6 - Step:1, Reward:4.9, Total:4.9
EPISODE 6 - Step:20, Reward:4.9, Total:98.2
------------------------------------------
EPISODE 7 - Step:1, Reward:4.9, Total:4.9
EPISODE 7 - Step:17, Reward:4.9, Total:83.6
------------------------------------------
EPISODE 8 - Step:1, Reward:4.9, Total:4.9
EPISODE 8 - Step:17, Reward:5.0, Total:84.0
----------

/Users/mdp/opt/anaconda3/envs/TORCH/lib/python3.7/site-packages/glfw/__init__.py:917: GLFWError: (65537) b'The GLFW library is not initialized'


EPISODE 21 - Step:20, Reward:5.2, Total:101.1
------------------------------------------
EPISODE 22 - Step:1, Reward:5.0, Total:5.0
EPISODE 22 - Step:20, Reward:5.2, Total:101.5
------------------------------------------
EPISODE 23 - Step:1, Reward:5.0, Total:5.0
EPISODE 23 - Step:18, Reward:5.2, Total:90.5
------------------------------------------
EPISODE 24 - Step:1, Reward:5.0, Total:5.0
EPISODE 24 - Step:23, Reward:5.3, Total:117.7
------------------------------------------
EPISODE 25 - Step:1, Reward:5.0, Total:5.0
EPISODE 25 - Step:23, Reward:5.3, Total:117.8
------------------------------------------
EPISODE 26 - Step:1, Reward:4.9, Total:4.9
EPISODE 26 - Step:19, Reward:5.3, Total:96.4
------------------------------------------
EPISODE 27 - Step:1, Reward:5.0, Total:5.0
EPISODE 27 - Step:19, Reward:5.3, Total:96.4
------------------------------------------
EPISODE 28 - Step:1, Reward:5.0, Total:5.0
EPISODE 28 - Step:20, Reward:5.3, Total:101.7
---------------------------------

### Artificial Intelligence

In [10]:
?PPO

###### Train

In [12]:
#pip install torch
#pip install stable-baselines3

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

## environment with no rendering
env = gym.make("Humanoid-v4", render_mode="human")
env = DummyVecEnv([lambda: env])

## train
model = PPO(policy="MlpPolicy", env=env, verbose=1, learning_rate=0.05)
print("Training started...")
model.learn(total_timesteps=100)
print(" complete.")

## save
model.save("humanoid_model")

Using cpu device
Training started...
-----------------------------
| time/              |      |
|    fps             | 29   |
|    iterations      | 1    |
|    time_elapsed    | 69   |
|    total_timesteps | 2048 |
-----------------------------
 complete.


###### Test

In [1]:
import gymnasium as gym
from stable_baselines3 import PPO
import time

env = gym.make("Humanoid-v4", render_mode="human")
model = PPO.load(path="humanoid_model", env=env)
obs, info = env.reset()

reset = False #reset if the humanoid falls or the episode ends
episode = 1
total_reward, step = 0, 0

for _ in range(240):
    ## action
    step += 1
    action, _ = model.predict(obs)    
    obs, reward, terminated, truncated, info = env.step(action) 
    ## reward
    total_reward += reward
    ## render
    env.render() 
    time.sleep(1/240)
    if (step == 1) or (step % 100 == 0): #print first step and every 100 steps
        print(f"EPISODE {episode} - Step:{step}, Reward:{reward:.1f}, Total:{total_reward:.1f}")
    ## reset
    if reset:
        if terminated or truncated: #print the last step
            print(f"EPISODE {episode} - Step:{step}, Reward:{reward:.1f}, Total:{total_reward:.1f}")
            obs, info = env.reset()
            episode += 1
            total_reward, step = 0, 0
            print("------------------------------------------")

env.close()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
EPISODE 1 - Step:1, Reward:4.7, Total:4.7
EPISODE 1 - Step:100, Reward:4.5, Total:401.3
EPISODE 1 - Step:200, Reward:3.9, Total:858.0


/Users/mdp/opt/anaconda3/envs/TORCH/lib/python3.7/site-packages/glfw/__init__.py:917: GLFWError: (65537) b'The GLFW library is not initialized'
