# Gymnasium + MuJoCo Exploration

This notebook provides an interactive environment for exploring Gymnasium MuJoCo environments.

In [None]:
import sys
sys.path.insert(0, '..')

import gymnasium as gym
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import HTML
from matplotlib import animation
from typing import List, Tuple, cast

from src.utils import print_env_info, create_env

## 1. Environment Overview

In [None]:
# Create an environment
# Explicitly typing the environment helps with IDE datatips
env: gym.Env = create_env("HalfCheetah-v5", render_mode="rgb_array")
print_env_info(env)

## 2. Visualize Random Actions

In [None]:
# Collect frames from a random episode
frames = []
obs, info = env.reset()

for _ in range(200):
    frames.append(env.render())
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        break

env.close()
print(f"Collected {len(frames)} frames")

In [None]:
# Create animation
fig, ax = plt.subplots(figsize=(8, 6))
ax.axis('off')
im = ax.imshow(frames[0])

def animate(i):
    im.set_array(frames[i])
    return [im]

anim = animation.FuncAnimation(
    fig, animate, frames=len(frames), interval=33, blit=True
)
plt.close()

HTML(anim.to_jshtml())

## 3. Explore Different Environments

In [None]:
# List of MuJoCo environments to explore
mujoco_envs = [
    "InvertedPendulum-v5",
    "HalfCheetah-v5",
    "Hopper-v5",
    "Walker2d-v5",
    "Ant-v5",
    "Humanoid-v5",
]

# Compare observation and action space sizes
print(f"{'Environment':<30} {'Obs Shape':<15} {'Action Shape':<15}")
print("=" * 60)

for env_id in mujoco_envs:
    # Type hint for the temporary environment
    env: gym.Env = gym.make(env_id)
    print(f"{env_id:<30} {str(env.observation_space.shape):<15} {str(env.action_space.shape):<15}")
    env.close()

## 4. Analyze Reward Distribution

In [None]:
# Run multiple episodes and plot reward distribution
env: gym.Env = create_env("HalfCheetah-v5")

n_episodes = 10
episode_rewards = []

for ep in range(n_episodes):
    obs, info = env.reset()
    total_reward = 0
    done = False
    
    while not done:
        action = env.action_space.sample()
        obs, reward, terminated, truncated, info = env.step(action)
        total_reward += reward
        done = terminated or truncated
    
    episode_rewards.append(total_reward)

env.close()

plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.bar(range(n_episodes), episode_rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Episode Rewards (Random Policy)')

plt.subplot(1, 2, 2)
plt.hist(episode_rewards, bins=10, edgecolor='black')
plt.xlabel('Total Reward')
plt.ylabel('Count')
plt.title('Reward Distribution')

plt.tight_layout()
plt.show()

print(f"Mean: {np.mean(episode_rewards):.2f}, Std: {np.std(episode_rewards):.2f}")