<a href="https://colab.research.google.com/github/kuds/rl-mujoco-tennis/blob/main/%5BMuJoCo%20Paddle%5D%20Proximal%20Policy%20Optimization%20(PPO).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
!pip install mujoco



In [24]:
!pip install stable-baselines3



In [25]:
import gymnasium
import mujoco
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback
import os
import torch
import platform
from importlib.metadata import version

In [11]:
print(f"Python Version: {platform.python_version()}")
print(f"Torch Version: {version('torch')}")
print(f"Is Cuda Available: {torch.cuda.is_available()}")
print(f"Cuda Version: {torch.version.cuda}")
print(f"Gymnasium Version: {version('gymnasium')}")
print(f"Numpy Version: {version('numpy')}")
print(f"Tensorflow Version: {version('tensorflow')}")
print(f"tqdm Version: {version('tqdm')}")

Python Version: 3.10.12
Torch Version: 2.5.0+cu121
Is Cuda Available: False
Cuda Version: 12.1
Gymnasium Version: 0.29.1
Numpy Version: 1.26.4
Tensorflow Version: 2.17.0
tqdm Version: 4.66.6


In [18]:
class BallBounceEnv(gymnasium.Env):
    def __init__(self):
        super(BallBounceEnv, self).__init__()
        # Define action and observation space
        # Actions: control the paddle position (-1 to 1)
        self.action_space = gymnasium.spaces.Box(low=-1.0, high=1.0, shape=(1,), dtype=np.float32)
        # Observations: ball and paddle positions and velocities
        self.observation_space = gymnasium.spaces.Box(
            low=-np.inf, high=np.inf, shape=(6,), dtype=np.float32
        )
        # Load MuJoCo model
        self.model_path = "ball_bounce.xml"  # Replace with actual path
        self.model = mujoco.MjModel.from_xml_path(self.model_path)
        self.sim = mujoco.MjSim(self.model)
        self.viewer = None
        self.reset()

    def reset(self):
        self.sim.reset()
        # Randomize initial conditions
        self.sim.data.qpos[:] = np.random.uniform(-0.1, 0.1, size=self.sim.model.nq)
        self.sim.data.qvel[:] = np.random.uniform(-0.1, 0.1, size=self.sim.model.nv)
        self.ball_bounces = 0
        return self._get_obs()

    def step(self, action):
        # Apply action (paddle control)
        self.sim.data.ctrl[0] = action[0]
        self.sim.step()

        # Check if ball hits paddle and update bounces
        if self._ball_hits_paddle():
            self.ball_bounces += 1

        reward = 1 if self._ball_hits_paddle() else -1
        done = self.ball_bounces >= 100

        return self._get_obs(), reward, done, {}

    def render(self, mode='human'):
        if self.viewer is None:
            self.viewer = mujoco_py.MjViewer(self.sim)
        self.viewer.render()

    def close(self):
        if self.viewer is not None:
            self.viewer = None

    def _get_obs(self):
        # Return ball and paddle positions and velocities
        return np.concatenate([
            self.sim.data.qpos[:],
            self.sim.data.qvel[:]
        ])

    def _ball_hits_paddle(self):
        # Logic to determine if the ball hits the paddle
        ball_pos = self.sim.data.qpos[0]
        paddle_pos = self.sim.data.qpos[1]
        return abs(ball_pos - paddle_pos) < 0.1

  and should_run_async(code)


In [21]:
# Ensure environment XML (MuJoCo model) is available
xml_content = """
<mujoco>
	<asset>
		<texture type="skybox" builtin="gradient" width="128" height="128" rgb1=".4 .6 .8" rgb2="0 0 0"/>
		<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" markrgb="1 1 1" random="0.01"/>
		<texture name="texplane" type="2d" builtin="checker" rgb1=".2 .3 .4" rgb2=".1 0.15 0.2" width="512" height="512"/>
		<material name='MatPlane' reflectance='0.5' texture="texplane" texrepeat="1 1" texuniform="true"/>
		<material name='geom' texture="texgeom" texuniform="true"/>
	</asset>
	<option gravity="0 0 -9.8" solver="CG" tolerance="1e-6"/>
	<worldbody>
		<light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
		<body pos="0 0 0">
			<geom type="cylinder" size=".20 .015" condim="3" friction="2" solref="0.01 0.15" material="MatPlane" rgba=".9 0 0 1"/>
			<joint axis="0 0 1" limited="true" name="slider" range="0 1" type="slide"/>
			<joint axis="0 1 0" limited="true" name="slider1" range="0 1" type="slide"/>
			<joint axis="1 0 0" limited="true" name="slider2" range="0 1" type="slide"/>
		</body>
		<body pos="0 0 .5">
			<joint type="free" damping="0.25"/>
			<geom type="sphere" size=".05" condim="3" solref="0.01 0.16" rgba="0 .9 0 1"/>
		</body>
	</worldbody>
	<actuator>
		<motor ctrllimited="true" ctrlrange="-1 1" gear="100" joint="slider" name="slide"/>
		<motor ctrllimited="true" ctrlrange="-1 1" gear="100" joint="slider1" name="slide1"/>
		<motor ctrllimited="true" ctrlrange="-1 1" gear="100" joint="slider2" name="slide2"/>
	</actuator>
</mujoco>
"""
with open("ball_bounce.xml", "w") as f:
    f.write(xml_content)

In [22]:
# Create and check environment
env = BallBounceEnv()
check_env(env)

# Train the agent using PPO
log_path = "./logs/"
os.makedirs(log_path, exist_ok=True)
env = DummyVecEnv([lambda: env])

model = PPO("MlpPolicy", env, verbose=1, tensorboard_log=log_path)
model.learn(total_timesteps=100000)

# Save and evaluate
model.save("ppo_ball_bounce")
eval_callback = EvalCallback(env, best_model_save_path="./logs/best_model",
                             log_path="./logs/", eval_freq=5000)


AttributeError: module 'mujoco' has no attribute 'MjSim'