In [1]:
from relayrl_framework import RelayRLAgent, TrainingServer
import gymnasium as gym
import numpy as np

In [2]:
def train_cartpole(num_episodes=100, config_path: str = ".", server_type: str = "GRPC"):
    env = gym.make('CartPole-v1')
    agent = RelayRLAgent(
        config_path=config_path,
        server_type=server_type
    )

    for episode in range(num_episodes):
        obs, _ = env.reset()
        done = False
        total_reward = 0
        mask = np.ones(env.action_space.n, dtype=np.float32)
        reward = 0.0
        while not done:
            action_obj = agent.request_for_action(obs, mask, reward)
            action_value = int(action_obj.get_act())
            next_obs, reward, terminated, truncated, _ = env.step(action_value)
            done = terminated or truncated
            agent.flag_last_action(reward)
            obs = next_obs
            total_reward += reward
        print(f'#### Episode {episode+1}: Total Reward = {total_reward} ####')
    env.close()

In [3]:
def main(
    buf_size: int = 1000000,
    tensorboard: bool = True,
    env_dir: str = ".",
    algorithm_dir: str = None,
    config_path: str = None,
    hyperparams: dict = None,
    server_type: str = "GRPC",
    training_prefix: str = None,
    training_host: str = None,
    training_port: str = None
    ):

    _server: TrainingServer = TrainingServer(
        algorithm_name="PPO",
        obs_dim=4,
        act_dim=2,
        buf_size=buf_size,
        tensorboard=tensorboard,
        env_dir=env_dir,
        algorithm_dir=algorithm_dir,
        config_path=config_path,
        hyperparams=hyperparams,
        server_type=server_type,
        training_prefix=training_prefix,
        training_host=training_host,
        training_port=training_port
    )

    train_cartpole(num_episodes=10, config_path=config_path, server_type=server_type)

In [4]:
main()

[ConfigLoader - load_config] Found config.json in current directory: "relayrl_config.json"
[TrainingServer - new] Resolved configuration path: Some("relayrl_config.json")
[TrainingServer - new] Resolved algorithm directory: /Users/tybg/Documents/GitHub/RelayRL-prototype/relayrl_framework/src/native/python/algorithms
[Instantiating RelayRL-Framework ZMQ TrainingServer...]
[ConfigLoader - load_config] Found config.json in current directory: "relayrl_config.json"
[TrainingServer - new] Resolved configuration path: relayrl_config.json
[TrainingServer - new] Training server address: tcp://127.0.0.1:50051
[TrainingServer - new] Trajectory server address: tcp://127.0.0.1:7776
[TrainingServer - new] Agent listener address: tcp://127.0.0.1:7777
[PythonAlgorithmRequest - new] Initializing Python Algorithm Request...
[32;1mLogging data to ././logs/relayrl-reinforce-info/relayrl-reinforce-info_s388090001/progress.txt[0m
[36;1mSaving config:
[0m
{
"__class__":	"REINFORCE",
"act_dim":	2,
"buf_si

  action_value = int(action_obj.get_act())


[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sending to TrainingServer
[RelayRLTrajectory - action_done] Sendin

[TrainingServer - training_loop] Received trajectory #4
[RelayRLTrajectory] New 1000 length trajectory created
[RelayRLTrajectory] New 1000 length trajectory created
[TrainingServer - training_loop] Received trajectory #5
[RelayRLTrajectory] New 1000 length trajectory created
[RelayRLTrajectory] New 1000 length trajectory created
[TrainingServer - training_loop] Received trajectory #6
[RelayRLTrajectory] New 1000 length trajectory created
[RelayRLTrajectory] New 1000 length trajectory created
[TrainingServer - training_loop] Received trajectory #7
[RelayRLTrajectory] New 1000 length trajectory created
[RelayRLTrajectory] New 1000 length trajectory created
[TrainingServer - training_loop] Received trajectory #8
[RelayRLTrajectory] New 1000 length trajectory created
[RelayRLTrajectory] New 1000 length trajectory created
-------------------------------------
|           Epoch |               1 |
|    AverageEpRet |            5.67 |
|        StdEpRet |            3.94 |
|        MaxEpRet 