## Test Gym Environment

| Package        | Version |
|----------------|---------|
| numpy          | < 2.0   |
| scipy          |         |
| matplotlib     |         |
| grpcio         |         |
| grpcio-tools   |         |
| torch          |         |

* Install the above packages, noting the compatibility issue between numpy and torch. Ensure to install numpy version < 2.0.

### Ant Multi Agent

| Package   | Version |
|-----------|---------|
| gymnasium | 0.29.1  |
| mujoco    | No Need |
| Level Name | Ant_Multiagent |


* Run the program, and the ant robot will perform random actions.

In [None]:
import gymnasium as gym
from gymnasium.envs.registration import register

import os
import sys

current_file_path = os.path.abspath('')
project_root = os.path.dirname(current_file_path)

# Add the project root directory to PYTHONPATH
if project_root not in sys.path:
    sys.path.append(project_root)


import importlib
import orca_gym.orca_gym as simulation_client

# Reload the module
importlib.reload(simulation_client)

# Test connection
import asyncio
import nest_asyncio

ROBOT_NAMES = ["Ant"]

import gymnasium as gym
from gymnasium.envs.registration import register
from envs.mujoco.ant_v5 import AntEnv
from datetime import datetime
import asyncio
import nest_asyncio
from concurrent.futures import ThreadPoolExecutor
from envs.orca_gym_env import ActionSpaceType

nest_asyncio.apply()

def register_env(grpc_address):
    register(
        id=f"Ant-v5-OrcaGym-{grpc_address[-2:]}",
        entry_point="envs.mujoco.ant_v5:AntEnv",
        kwargs={'frame_skip': 5, 
                'action_space_type': ActionSpaceType.CONTINUOUS,
                'action_step_count': 0,
                'grpc_address': grpc_address, 
                'agent_names': ['Ant'], 
                'time_step': 0.016},
        max_episode_steps=200,
        reward_threshold=0.0,
    )

async def run_test(grpc_address):
    env_id = f"Ant-v5-OrcaGym-{grpc_address[-2:]}"
    register_env(grpc_address)

    env = gym.make(env_id)
    observation, info = env.reset(seed=42)

    for _ in range(500):
        start_time = datetime.now()

        action = env.action_space.sample()  # this is where you would insert your policy
        observation, reward, terminated, truncated, info = env.step(action)

        # Frame rate is 60fps, to display at normal speed, render every 16ms
        elapsed_time = datetime.now() - start_time
        if elapsed_time.total_seconds() < 0.016:
            await asyncio.sleep(0.016 - elapsed_time.total_seconds())

        if terminated or truncated:
            observation, info = env.reset()

    env.close()

def run_in_thread(grpc_address):
    asyncio.run(run_test(grpc_address))

if __name__ == "__main__":
    grpc_addresses = [f'localhost:500{i}' for i in range(51, 55)]  # 'localhost:50051' ~ 'localhost:50058'
    with ThreadPoolExecutor(max_workers=len(grpc_addresses)) as executor:
        executor.map(run_in_thread, grpc_addresses)


### Verify PyTorch Installation
* Reinforcement learning training typically requires GPU acceleration, so using the PyTorch framework can significantly improve training performance.
* The following code verifies the PyTorch installation and checks the output to determine whether a GPU-accelerated version of PyTorch (compatible with your CUDA version) is installed.



In [None]:
import torch

print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA version:", torch.version.cuda)
    print("Current CUDA device:", torch.cuda.current_device())
    print("CUDA device name:", torch.cuda.get_device_name(torch.cuda.current_device()))

In [None]:
import torch
import time

# Check if CUDA is available
if not torch.cuda.is_available():
    raise SystemError("CUDA is not available. Please check your PyTorch installation.")

# Print CUDA device information
device = torch.device("cuda")
print("Using device:", torch.cuda.get_device_name(0))

# Create a large tensor
tensor_size = 20000  # Increase the size of the tensor
x = torch.rand((tensor_size, tensor_size), device=device)
y = torch.rand((tensor_size, tensor_size), device=device)

# Perform multiple matrix multiplication operations and measure the time
num_iterations = 10
start_time = time.time()

for _ in range(num_iterations):
    result = torch.mm(x, y)

elapsed_time = time.time() - start_time

print(f"{num_iterations} iterations of matrix multiplication completed in {elapsed_time:.4f} seconds.")
print(f"Average time per iteration: {elapsed_time / num_iterations:.4f} seconds.")

# Check a part of the result to ensure the operation was successful
print("Result[0, 0]:", result[0, 0].item())


### Pusher V5

| Package          | Version     |
|------------------|-------------|
| gymnasium        | 0.29.1      |
| mujoco           | Not Installed |
| stable-baselines3| 2.3.2       |
| Level Name       | pusher_v5   |
| Training Iterations | 10000      |

* Attempt to train the robot using the PPO algorithm to complete the push task.
* You can adjust the number of training iterations to observe different effects.

In [None]:
import gymnasium as gym
from gymnasium.envs.registration import register

import os
import sys

current_file_path = os.path.abspath('')
project_root = os.path.dirname(current_file_path)

# Add the project root directory to PYTHONPATH
if project_root not in sys.path:
    sys.path.append(project_root)


import importlib
import orca_gym.orca_gym as simulation_client

# Reload the module
importlib.reload(simulation_client)

# Test connection
import asyncio
import nest_asyncio

import gymnasium as gym
from gymnasium.envs.registration import register
from envs.orca_gym_env import ActionSpaceType
from datetime import datetime
import asyncio
import nest_asyncio
from concurrent.futures import ThreadPoolExecutor
from stable_baselines3 import PPO

nest_asyncio.apply()

def register_env(grpc_address):
    print("register_env: ", grpc_address)
    register(
        id=f"Pusher-v5-OrcaGym-{grpc_address[-2:]}",
        entry_point="envs.mujoco.pusher_v5:PusherEnv",
        kwargs={'frame_skip': 5, 
                'action_space_type': ActionSpaceType.CONTINUOUS,
                'action_step_count': 0,
                'grpc_address': grpc_address, 
                'agent_names': ['Pusher'], 
                'time_step': 0.01},
        max_episode_steps=100,
        reward_threshold=0.0,
    )

async def run_test(grpc_address):
    print("simulation running... , grpc_address: ", grpc_address)
    env_id = f"Pusher-v5-OrcaGym-{grpc_address[-2:]}"
    register_env(grpc_address)

    env = gym.make(env_id)

    # Load existing model
    # model = PPO.load("pusher_ppo_model.zip", env=env)

    # Train using PPO algorithm
    model = PPO('MlpPolicy', env, verbose=1, n_steps=2048, learning_rate=0.0003, gamma=0.99, gae_lambda=0.95, clip_range=0.2, ent_coef=0.01)
    model.learn(total_timesteps=10000)

    # Save the model
    model.save("pusher_ppo_model")

if __name__ == "__main__":
    asyncio.run(run_test('localhost:50051'))


### Verify Trained Model

In [None]:
import gymnasium as gym
from gymnasium.envs.registration import register

import os
import sys

current_file_path = os.path.abspath('')
project_root = os.path.dirname(current_file_path)

# Add the project root directory to PYTHONPATH
if project_root not in sys.path:
    sys.path.append(project_root)


import importlib
import orca_gym.orca_gym as simulation_client

# Reload the module
importlib.reload(simulation_client)

# Test connection
import asyncio
import nest_asyncio

import gymnasium as gym
from gymnasium.envs.registration import register
from envs.orca_gym_env import ActionSpaceType
from envs.mujoco.ant_v5 import AntEnv
from datetime import datetime
import asyncio
import nest_asyncio
from concurrent.futures import ThreadPoolExecutor
from stable_baselines3 import PPO

nest_asyncio.apply()

def register_env(grpc_address):
    print("register_env: ", grpc_address)
    register(
        id=f"Pusher-v5-OrcaGym-{grpc_address[-2:]}",
        entry_point="envs.mujoco.pusher_v5:PusherEnv",
        kwargs={'frame_skip': 1, 
                'action_space_type': ActionSpaceType.CONTINUOUS,
                'action_step_count': 0,
                'grpc_address': grpc_address, 
                'agent_names': ['Pusher'], 
                'time_step': 0.01},
        max_episode_steps=100,
        reward_threshold=0.0,
    )

async def run_test(grpc_address):
    print("simulation running... , grpc_address: ", grpc_address)
    env_id = f"Pusher-v5-OrcaGym-{grpc_address[-2:]}"
    register_env(grpc_address)

    env = gym.make(env_id)

    # Load existing model
    model = PPO.load("pusher_ppo_model")

    # Test the trained model
    observation, info = env.reset(seed=42)
    for _ in range(1000):

        start_time = datetime.now()

        action, _states = model.predict(observation, deterministic=True)
        observation, reward, terminated, truncated, info = env.step(action)

        # Print the reward
        print(f"Step: {_}, Reward: {reward}")

        elapsed_time = datetime.now() - start_time
        if elapsed_time.total_seconds() < 0.01:
            await asyncio.sleep(0.01 - elapsed_time.total_seconds())

        if terminated or truncated:
            observation, info = env.reset()
    env.close()

if __name__ == "__main__":
    asyncio.run(run_test('localhost:50051'))