# **Assignment 1: Walker2D**
### Stefan Obradovic and Atin Srivastava







---





---



In [None]:
# Install and configure MuJoCo in Google Colab
# !apt-get install -y mujoco-py
# !pip install gymnasium[mujoco] stable-baselines3

In [None]:
!pip install mujoco
!pip install stable-baselines3

# Set up GPU rendering.
from google.colab import files
import distutils.util
import os
import subprocess

if subprocess.run('nvidia-smi').returncode:
  raise RuntimeError(
      'Cannot communicate with GPU. '
      'Make sure you are using a GPU Colab runtime. '
      'Go to the Runtime menu and select Choose runtime type.')

# Add an ICD config so that glvnd can pick up the Nvidia EGL driver.
# This is usually installed as part of an Nvidia driver package, but the Colab
# kernel doesn't install its driver via APT, and as a result the ICD is missing.
# (https://github.com/NVIDIA/libglvnd/blob/master/src/EGL/icd_enumeration.md)

NVIDIA_ICD_CONFIG_PATH = '/usr/share/glvnd/egl_vendor.d/10_nvidia.json'
if not os.path.exists(NVIDIA_ICD_CONFIG_PATH):
  with open(NVIDIA_ICD_CONFIG_PATH, 'w') as f:
    f.write("""{
    "file_format_version" : "1.0.0",
    "ICD" : {
        "library_path" : "libEGL_nvidia.so.0"
    }
}
""")

# Configure MuJoCo to use the EGL rendering backend (requires GPU)
print('Setting environment variable to use GPU rendering:')
%env MUJOCO_GL=egl

# Check if installation was succesful.
try:
  print('Checking that the installation succeeded:')
  import mujoco
  mujoco.MjModel.from_xml_string('<mujoco/>')
except Exception as e:
  raise e from RuntimeError(
      'Something went wrong during installation. Check the shell output above '
      'for more information.\n'
      'If using a hosted Colab runtime, make sure you enable GPU acceleration '
      'by going to the Runtime menu and selecting "Choose runtime type".')

print('Installation successful.')

# Other imports and helper functions
import time
import itertools
import numpy as np
import pandas as pd

# Graphics and plotting.
print('Installing mediapy:')
!command -v ffmpeg >/dev/null || (apt update && apt install -y ffmpeg)
!pip install -q mediapy
import mediapy as media
import matplotlib.pyplot as plt

# More legible printing from numpy.
np.set_printoptions(precision=3, suppress=True, linewidth=100)

import gymnasium as gym
from stable_baselines3 import SAC
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy

from IPython.display import clear_output, HTML
clear_output()

from mujoco import Renderer
from base64 import b64encode
import imageio

In [None]:
# Load the Walker2d-v4 environment
env_name = "Walker2d-v4"
env = gym.make(env_name)
env = Monitor(env)  # Monitor for logging

In [None]:
# Describe the observation and action spaces
obs_space = env.observation_space
act_space = env.action_space

print(f"Observation Space: {obs_space}")
print(f"Action Space: {act_space}")

In [None]:
# Reward Structure:
# - Positive reward for forward movement
# - Negative reward for energy usage (efficiency penalty)
# - Large penalty for falling (termination condition)

log_dir = "./sac_walker2d/"
os.makedirs(log_dir, exist_ok=True)

# Initialize SAC model
model = SAC("MlpPolicy", env, verbose=1, tensorboard_log=log_dir)

# Train the agent
model.learn(total_timesteps=500000, log_interval=1000)

# Save the trained model
model.save("sac_walker2d_model")

# Evaluate the trained agent
def evaluate(env, model, num_episodes=10):
    rewards = []
    for episode in range(num_episodes):
        obs, _ = env.reset()
        total_reward = 0
        done = False
        while not done:
            action, _states = model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, _ = env.step(action)
            total_reward += reward
            done = terminated or truncated
        rewards.append(total_reward)
    return np.mean(rewards), np.std(rewards)

mean_reward, std_reward = evaluate(env, model, num_episodes=10)
print(f"Evaluation over 10 episodes: Mean Reward = {mean_reward:.2f}, Std Dev = {std_reward:.2f}")

# Plot training result
# Find monitor log file
monitor_files = [f for f in os.listdir(log_dir) if "monitor" in f]

if len(monitor_files) == 0:
    raise FileNotFoundError("No monitor log file found. Ensure Monitor(env, log_dir) used")

monitor_file = os.path.join(log_dir, monitor_files[0])

# Read training data
monitor_data = pd.read_csv(monitor_file, skiprows=1)  # Skip the first row of metadata

# Plot training rewards
plt.figure(figsize=(10, 5))
plt.plot(monitor_data["r"], label="Episode Reward")
plt.xlabel("Episodes")
plt.ylabel("Reward")
plt.title("Training Performance of SAC on Walker2D")
plt.legend()
plt.show()

In [None]:
# Render the trained agent using MuJoCo Renderer
video_dir = "videos"
os.makedirs(video_dir, exist_ok=True)
video_path = os.path.join(video_dir, "walker2d_sac.mp4")

env = gym.make(env_name, render_mode="rgb_array")
obs, _ = env.reset()
frames = []

# Create a new MuJoCo renderer
mujoco_renderer = mujoco.Renderer(env.unwrapped.model, 480, 480)

for _ in range(10000):
    action, _ = model.predict(obs, deterministic=True)
    obs, _, terminated, truncated, _ = env.step(action)
    mujoco_renderer.update_scene(env.unwrapped.data)
    frame = mujoco_renderer.render()
    frames.append(frame)
    if terminated or truncated:
        break

env.close()

# Save frames as a video
imageio.mimsave(video_path, frames, fps=10)

# Function to display the video in Colab
def show_video(video_path):
    mp4 = open(video_path, 'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML(f'<video width="640" height="480" controls><source src="{data_url}" type="video/mp4"></video>')

# Display the video
show_video(video_path)