In [1]:
from relayrl_framework import RelayRLAgent, TrainingServer
import gymnasium as gym
import numpy as np

In [2]:
def train_lunar_lander(num_episodes=100, config_path: str = ".", server_type: str = "GRPC"):
    env = gym.make('LunarLander-v3')
    agent = RelayRLAgent(
        config_path=config_path,
        server_type=server_type
    )

    for episode in range(num_episodes):
        obs, _ = env.reset()
        done = False
        total_reward = 0
        mask = np.ones(env.action_space.n, dtype=np.float32)
        reward = 0.0
        while not done:
            action_obj = agent.request_for_action(obs, mask, reward)
            action_value = int(action_obj.get_act())
            next_obs, reward, terminated, truncated, _ = env.step(action_value)
            done = terminated or truncated
            agent.flag_last_action(reward)
            obs = next_obs
            total_reward += reward
        print(f'#### Episode {episode+1}: Total Reward = {total_reward} ####')
    env.close()

In [None]:
def main(
    buf_size: int = 1000000,
    tensorboard: bool = True,
    env_dir: str = ".",
    algorithm_dir: str = None,
    config_path: str = None,
    hyperparams: dict = None,
    server_type: str = "GRPC",
    training_prefix: str = None,
    training_host: str = None,
    training_port: str = None
    ):

    _server: TrainingServer = TrainingServer(
        algorithm_name="PPO",
        obs_dim=8,
        act_dim=4,
        buf_size=buf_size,
        tensorboard=tensorboard,
        env_dir=env_dir,
        algorithm_dir=algorithm_dir,
        config_path=config_path,
        hyperparams=hyperparams,
        server_type=server_type,
        training_prefix=training_prefix,
        training_host=training_host,
        training_port=training_port
    )

    train_lunar_lander(num_episodes=10, config_path=config_path, server_type=server_type)


In [None]:
main()