<a href="https://colab.research.google.com/github/kuds/rl-connect-four/blob/main/%5BConnect%20Four%5D%20Self%20Play.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Playing Connect Four using Self Play

In [1]:
!pip install https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp311-cp311-manylinux2014_x86_64.whl

Collecting ray==3.0.0.dev0
  Downloading https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-3.0.0.dev0-cp311-cp311-manylinux2014_x86_64.whl (67.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 MB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m


In [2]:
!pip install gputil open_spiel gymnasium



In [3]:
import functools
import numpy as np
import multiprocessing as mp
import ray
from ray import tune
from ray.air.constants import TRAINING_ITERATION
from ray.rllib.core.rl_module.default_model_config import DefaultModelConfig
from ray.rllib.core.rl_module.multi_rl_module import MultiRLModuleSpec
from ray.rllib.core.rl_module.rl_module import RLModuleSpec
from ray.rllib.env.utils import try_import_pyspiel, try_import_open_spiel
from ray.rllib.env.wrappers.open_spiel import OpenSpielEnv
from ray.rllib.examples.rl_modules.classes.random_rlm import RandomRLModule
from ray.rllib.examples.multi_agent.utils import (
    ask_user_for_action,
    SelfPlayCallback,
    SelfPlayCallbackOldAPIStack,
)
from ray.rllib.examples._old_api_stack.policy.random_policy import RandomPolicy
from ray.rllib.policy.policy import PolicySpec
from ray.rllib.utils.metrics import NUM_ENV_STEPS_SAMPLED_LIFETIME
from ray.rllib.utils.test_utils import (
    add_rllib_example_script_args,
    run_rllib_example_script_experiment,
)
from ray.tune.registry import get_trainable_cls, register_env
import platform

import torch
from importlib.metadata import version



In [4]:
print(f"Python Version: {platform.python_version()}")
print(f"Torch Version: {version('torch')}")
print(f"Is Cuda Available: {torch.cuda.is_available()}")
print(f"Cuda Version: {torch.version.cuda}")
print(f"Numpy Version: {version('numpy')}")
print(f"Ray Version: {version('ray')}")
print(f"Gymnasium Version: {version('Gymnasium')}")
print(f"Open Spiel Version: {version('open_spiel')}")

Python Version: 3.11.11
Torch Version: 2.5.1+cu121
Is Cuda Available: True
Cuda Version: 12.1
Numpy Version: 1.26.4
Ray Version: 3.0.0.dev0
Gymnasium Version: 1.0.0
Open Spiel Version: 1.5


In [5]:
print(f"Number or CPUs Available: {mp.cpu_count()}")

Number or CPUs Available: 12


In [6]:
open_spiel = try_import_open_spiel(error=True)
pyspiel = try_import_pyspiel(error=True)

# Import after try_import_open_spiel, so we can error out with hints.
from open_spiel.python.rl_environment import Environment  # noqa: E402

In [7]:
class Args:
    def __init__(self):
        self.env = "connect_four"
        self.checkpoint_freq = 1
        self.checkpoint_at_end = True
        self.win_rate_threshold = 0.95
        self.min_league_size = 3
        self.num_episodes_human_play = 1
        self.from_checkpoint = None
        # Add other necessary attributes from parser arguments
        self.algo = 'PPO' # Assuming PPO is the default algorithm
        self.num_env_runners = 2
        self.enable_new_api_stack = True
        self.stop_timesteps = 2000000
        self.stop_iters = 100
        self.as_release_test = False
        self.num_cpus = 10
        self.local_mode = False
        self.framework = 'torch'
        self.num_gpus = 0
        self.num_gpus_per_learner = 1
        self.num_learners = 1
        self.evaluation_interval = 0
        self.log_level = None
        self.output = None
        self.no_tune = False
        self.num_agents = 0
        self.verbose = 2
        self.num_samples = 1
        self.max_concurrent_trials = None
        self.as_test = False
        self.num_envs_per_env_runner = 1 if self.enable_new_api_stack else 5

args = Args()

In [8]:
register_env("open_spiel_env",
             lambda _: OpenSpielEnv(pyspiel.load_game(args.env)))

def agent_to_module_mapping_fn(agent_id, episode, **kwargs):
        # agent_id = [0|1] -> module depends on episode ID
        # This way, we make sure that both modules sometimes play agent0
        # (start player) and sometimes agent1 (player to move 2nd).
        return "main" if hash(episode.id_) % 2 == agent_id else "random"

def policy_mapping_fn(agent_id, episode, worker, **kwargs):
    return "main" if episode.episode_id % 2 == agent_id else "random"

config = (
    get_trainable_cls(args.algo)
    .get_default_config()
    .environment("open_spiel_env")
    # Set up the main piece in this experiment: The league-bases self-play
    # callback, which controls adding new policies/Modules to the league and
    # properly matching the different policies in the league with each other.
    .callbacks(
        functools.partial(
            (
                SelfPlayCallback
                if args.enable_new_api_stack
                else SelfPlayCallbackOldAPIStack
            ),
            win_rate_threshold=args.win_rate_threshold,
        )
    )
    .env_runners(
        num_env_runners=(args.num_env_runners or 2),
        num_envs_per_env_runner=1 if args.enable_new_api_stack else 5,
    )
    .multi_agent(
        # Initial policy map: Random and default algo one. This will be expanded
        # to more policy snapshots taken from "main" against which "main"
        # will then play (instead of "random"). This is done in the
        # custom callback defined above (`SelfPlayCallback`).
        policies=(
            {
                # Our main policy, we'd like to optimize.
                "main": PolicySpec(),
                # An initial random opponent to play against.
                "random": PolicySpec(policy_class=RandomPolicy),
            }
            if not args.enable_new_api_stack
            else {"main", "random"}
        ),
        # Assign agent 0 and 1 randomly to the "main" policy or
        # to the opponent ("random" at first). Make sure (via episode_id)
        # that "main" always plays against "random" (and not against
        # another "main").
        policy_mapping_fn=(
            agent_to_module_mapping_fn
            if args.enable_new_api_stack
            else policy_mapping_fn
        ),
        # Always just train the "main" policy.
        policies_to_train=["main"],
    )
    .rl_module(
        model_config=DefaultModelConfig(fcnet_hiddens=[512, 512]),
        rl_module_spec=MultiRLModuleSpec(
            rl_module_specs={
                "main": RLModuleSpec(),
                "random": RLModuleSpec(module_class=RandomRLModule),
            }
        ),
    )
)

# Only for PPO, change the `num_epochs` setting.
if args.algo == "PPO":
    config.training(num_epochs=20)

stop = {
    NUM_ENV_STEPS_SAMPLED_LIFETIME: args.stop_timesteps,
    TRAINING_ITERATION: args.stop_iters,
    "league_size": args.min_league_size,
}

# Train the "main" policy to play really well using self-play.
results = None
if not args.from_checkpoint:
    results = run_rllib_example_script_experiment(
        config, args, stop=stop
    )

Usage stats collection is enabled by default for nightly wheels. To disable this, run the following command: `ray disable-usage-stats` before starting Ray. See https://docs.ray.io/en/master/cluster/usage-stats.html for more details.


  gym.logger.warn(
  gym.logger.warn(
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")
2025-01-16 02:18:06,043	INFO worker.py:1841 -- Started a local Ray instance.
2025-01-16 02:18:06,983	INFO tensorboardx.py:193 -- pip install "ray[tune]" to see TensorBoard files.


== Status ==
Current time: 2025-01-16 02:18:07 (running for 00:00:00.56)
Using FIFO scheduling algorithm.
Logical resource usage: 0/10 CPUs, 0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 PENDING)


== Status ==
Current time: 2025-01-16 02:18:12 (running for 00:00:05.59)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 PENDING)




[36m(pid=43809)[0m lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.


== Status ==
Current time: 2025-01-16 02:18:17 (running for 00:00:10.60)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 PENDING)


== Status ==
Current time: 2025-01-16 02:18:22 (running for 00:00:15.62)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 PENDING)




[36m(pid=43938)[0m lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.
[36m(pid=43939)[0m lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.


== Status ==
Current time: 2025-01-16 02:18:27 (running for 00:00:20.64)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 PENDING)




[36m(pid=44099)[0m lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.


== Status ==
Current time: 2025-01-16 02:18:32 (running for 00:00:25.66)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 PENDING)




[36m(_WrappedExecutable pid=44099)[0m Setting up process group for: env:// [rank=0, world_size=1]
[36m(PPO pid=43809)[0m Trainable.setup took 19.091 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty
[36m(MultiAgentEnvRunner pid=43939)[0

== Status ==
Current time: 2025-01-16 02:18:37 (running for 00:00:30.70)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=21.29,num_env_steps_sampled_lifetime=4000,env_runners/episode_return_mean=-0.073 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True},

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000000)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 133x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 49x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 70x across cluster][0m


== Status ==
Current time: 2025-01-16 02:18:42 (running for 00:00:35.75)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=19.71,num_env_steps_sampled_lifetime=8000,env_runners/episode_return_mean=-0.06100000000000001 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_plac

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000001)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 102x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 65x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 37x across cluster][0m


== Status ==
Current time: 2025-01-16 02:18:47 (running for 00:00:40.81)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-01-16 02:18:52 (running for 00:00:45.83)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=21.15,num_env_steps_sampled_lifetime=12000,env_runners/episode_return_mean=-0.063 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placeme

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000002)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 153x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 72x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 81x across cluster][0m


== Status ==
Current time: 2025-01-16 02:18:57 (running for 00:00:50.88)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=19.81,num_env_steps_sampled_lifetime=16000,env_runners/episode_return_mean=-0.06000000000000002 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pla

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000003)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 137x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 69x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 68x across cluster][0m


== Status ==
Current time: 2025-01-16 02:19:02 (running for 00:00:55.90)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=19.15,num_env_steps_sampled_lifetime=20000,env_runners/episode_return_mean=-0.051000000000000004 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000004)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 101x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 60x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 41x across cluster][0m


== Status ==
Current time: 2025-01-16 02:19:07 (running for 00:01:00.96)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=19.76,num_env_steps_sampled_lifetime=24000,env_runners/episode_return_mean=-0.06800000000000002 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pla

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000005)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 143x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 77x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 66x across cluster][0m


== Status ==
Current time: 2025-01-16 02:19:13 (running for 00:01:06.02)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=18.16,num_env_steps_sampled_lifetime=28000,env_runners/episode_return_mean=-0.044000000000000004 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000006)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 111x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 54x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 57x across cluster][0m


== Status ==
Current time: 2025-01-16 02:19:18 (running for 00:01:11.10)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=17.75,num_env_steps_sampled_lifetime=32000,env_runners/episode_return_mean=-0.04600000000000001 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pla

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000007)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 106x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 63x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 43x across cluster][0m


== Status ==
Current time: 2025-01-16 02:19:23 (running for 00:01:16.18)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=17.2,num_env_steps_sampled_lifetime=36000,env_runners/episode_return_mean=-0.030000000000000006 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pla

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000008)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 82x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 44x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 38x across cluster][0m


== Status ==
Current time: 2025-01-16 02:19:28 (running for 00:01:21.22)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-01-16 02:19:33 (running for 00:01:26.24)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=17.4,num_env_steps_sampled_lifetime=40000,env_runners/episode_return_mean=-0.04500000000000001 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker':

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000009)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 74x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 38x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 36x across cluster][0m


== Status ==
Current time: 2025-01-16 02:19:38 (running for 00:01:31.25)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=16.58,num_env_steps_sampled_lifetime=44000,env_runners/episode_return_mean=-0.023000000000000007 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000010)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 63x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 37x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 26x across cluster][0m


== Status ==
Current time: 2025-01-16 02:19:43 (running for 00:01:36.33)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=16.32,num_env_steps_sampled_lifetime=48000,env_runners/episode_return_mean=-0.02500000000000001 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pla

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000011)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 50x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 26x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 24x across cluster][0m


== Status ==
Current time: 2025-01-16 02:19:48 (running for 00:01:41.40)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=17.19,num_env_steps_sampled_lifetime=52000,env_runners/episode_return_mean=-0.04900000000000002 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pla

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000012)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 75x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 35x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 40x across cluster][0m


== Status ==
Current time: 2025-01-16 02:19:53 (running for 00:01:46.49)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=15.1,num_env_steps_sampled_lifetime=56000,env_runners/episode_return_mean=-0.027000000000000003 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pla

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000013)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 74x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 27x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 47x across cluster][0m


== Status ==
Current time: 2025-01-16 02:19:58 (running for 00:01:51.54)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=14.96,num_env_steps_sampled_lifetime=60000,env_runners/episode_return_mean=-0.019000000000000006 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000014)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 55x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 29x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 26x across cluster][0m


== Status ==
Current time: 2025-01-16 02:20:03 (running for 00:01:56.60)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-01-16 02:20:08 (running for 00:02:01.64)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=14.93,num_env_steps_sampled_lifetime=64000,env_runners/episode_return_mean=-0.013000000000000003 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000015)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 34x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 15x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 19x across cluster][0m


== Status ==
Current time: 2025-01-16 02:20:13 (running for 00:02:06.72)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=13.51,num_env_steps_sampled_lifetime=68000,env_runners/episode_return_mean=-0.008000000000000002 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000016)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 83x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 49x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 34x across cluster][0m


== Status ==
Current time: 2025-01-16 02:20:18 (running for 00:02:11.78)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=13.27,num_env_steps_sampled_lifetime=72000,env_runners/episode_return_mean=-0.015000000000000005 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000017)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 61x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 34x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 27x across cluster][0m


== Status ==
Current time: 2025-01-16 02:20:23 (running for 00:02:16.85)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=13.7,num_env_steps_sampled_lifetime=76000,env_runners/episode_return_mean=-0.023000000000000007 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pla

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000018)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 67x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 14x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 53x across cluster][0m


== Status ==
Current time: 2025-01-16 02:20:28 (running for 00:02:21.92)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=14.44,num_env_steps_sampled_lifetime=80000,env_runners/episode_return_mean=-0.025000000000000005 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000019)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 58x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 31x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 27x across cluster][0m


== Status ==
Current time: 2025-01-16 02:20:33 (running for 00:02:26.98)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=13.29,num_env_steps_sampled_lifetime=84000,env_runners/episode_return_mean=-0.023000000000000003 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000020)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 70x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 38x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 32x across cluster][0m


== Status ==
Current time: 2025-01-16 02:20:44 (running for 00:02:37.07)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=12.56,num_env_steps_sampled_lifetime=88000,env_runners/episode_return_mean=-0.015000000000000005 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000021)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 56x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 26x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 30x across cluster][0m


== Status ==
Current time: 2025-01-16 02:20:49 (running for 00:02:42.11)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=13.31,num_env_steps_sampled_lifetime=92000,env_runners/episode_return_mean=-0.018000000000000006 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000022)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 63x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 22x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 41x across cluster][0m


== Status ==
Current time: 2025-01-16 02:20:54 (running for 00:02:47.14)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=13.07,num_env_steps_sampled_lifetime=96000,env_runners/episode_return_mean=-0.012000000000000002 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000023)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 43x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 23x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 20x across cluster][0m


== Status ==
Current time: 2025-01-16 02:20:59 (running for 00:02:52.20)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=12.16,num_env_steps_sampled_lifetime=100000,env_runners/episode_return_mean=-0.013000000000000006 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_p

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000024)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 44x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 21x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 23x across cluster][0m


== Status ==
Current time: 2025-01-16 02:21:04 (running for 00:02:57.25)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-01-16 02:21:09 (running for 00:03:02.30)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=12.15,num_env_steps_sampled_lifetime=104000,env_runners/episode_return_mean=-0.007999999999999998 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worke

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000025)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 17x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 5x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 12x across cluster][0m


== Status ==
Current time: 2025-01-16 02:21:14 (running for 00:03:07.37)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=12.15,num_env_steps_sampled_lifetime=108000,env_runners/episode_return_mean=-0.006000000000000001 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_p

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000026)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 47x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 30x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 17x across cluster][0m


== Status ==
Current time: 2025-01-16 02:21:19 (running for 00:03:12.42)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=11.58,num_env_steps_sampled_lifetime=112000,env_runners/episode_return_mean=-0.014000000000000004 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_p

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000027)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 49x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 38x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 11x across cluster][0m


== Status ==
Current time: 2025-01-16 02:21:24 (running for 00:03:17.45)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=11.92,num_env_steps_sampled_lifetime=116000,env_runners/episode_return_mean=-0.007000000000000005 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_p

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000028)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 29x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 12x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 17x across cluster][0m


== Status ==
Current time: 2025-01-16 02:21:29 (running for 00:03:22.51)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-01-16 02:21:34 (running for 00:03:27.53)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=12.29,num_env_steps_sampled_lifetime=120000,env_runners/episode_return_mean=-0.010000000000000004 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worke

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000029)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 32x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 18x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 14x across cluster][0m


== Status ==
Current time: 2025-01-16 02:21:39 (running for 00:03:32.53)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=10.88,num_env_steps_sampled_lifetime=124000,env_runners/episode_return_mean=-0.0019999999999999996 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000030)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 44x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 16x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 28x across cluster][0m


== Status ==
Current time: 2025-01-16 02:21:44 (running for 00:03:37.56)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=11.72,num_env_steps_sampled_lifetime=128000,env_runners/episode_return_mean=-0.007000000000000004 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_p

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000031)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 21x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 5x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 16x across cluster][0m


== Status ==
Current time: 2025-01-16 02:21:49 (running for 00:03:42.57)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=11.71,num_env_steps_sampled_lifetime=132000,env_runners/episode_return_mean=-0.004000000000000001 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_p

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000032)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 31x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 12x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 19x across cluster][0m


== Status ==
Current time: 2025-01-16 02:21:54 (running for 00:03:47.57)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-01-16 02:21:59 (running for 00:03:52.64)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=11.5,num_env_steps_sampled_lifetime=136000,env_runners/episode_return_mean=-0.013000000000000006 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000033)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 34x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 19x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 15x across cluster][0m


== Status ==
Current time: 2025-01-16 02:22:04 (running for 00:03:57.71)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=11.11,num_env_steps_sampled_lifetime=140000,env_runners/episode_return_mean=-0.016 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000034)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 36x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 18x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 18x across cluster][0m


== Status ==
Current time: 2025-01-16 02:22:09 (running for 00:04:02.72)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=10.5,num_env_steps_sampled_lifetime=144000,env_runners/episode_return_mean=-0.012000000000000005 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000035)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 31x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 11x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 20x across cluster][0m


== Status ==
Current time: 2025-01-16 02:22:14 (running for 00:04:07.73)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=11.59,num_env_steps_sampled_lifetime=148000,env_runners/episode_return_mean=-0.035 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000036)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 59x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 23x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 36x across cluster][0m


== Status ==
Current time: 2025-01-16 02:22:19 (running for 00:04:12.79)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-01-16 02:22:24 (running for 00:04:17.86)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=10.52,num_env_steps_sampled_lifetime=152000,env_runners/episode_return_mean=-0.0030000000000000014 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_work

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000037)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 31x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 17x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 14x across cluster][0m


== Status ==
Current time: 2025-01-16 02:22:29 (running for 00:04:22.91)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=11.41,num_env_steps_sampled_lifetime=156000,env_runners/episode_return_mean=-0.005 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000038)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 20x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 9x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 11x across cluster][0m


== Status ==
Current time: 2025-01-16 02:22:34 (running for 00:04:27.94)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=10.1,num_env_steps_sampled_lifetime=160000,env_runners/episode_return_mean=-0.004000000000000001 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_pl

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000039)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 20x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 6x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 14x across cluster][0m


== Status ==
Current time: 2025-01-16 02:22:40 (running for 00:04:33.04)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=11.25,num_env_steps_sampled_lifetime=164000,env_runners/episode_return_mean=-0.018000000000000002 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_p

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000040)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 34x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 17x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 17x across cluster][0m


== Status ==
Current time: 2025-01-16 02:22:45 (running for 00:04:38.12)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


== Status ==
Current time: 2025-01-16 02:22:50 (running for 00:04:43.17)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)


Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=10.48,num_env_steps_sampled_lifetime=168000,env_runners/episode_return_mean=-0.004000000000000001 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worke

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000041)
[36m(MultiAgentEnvRunner pid=43939)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 27x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 12x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 15x across cluster][0m


== Status ==
Current time: 2025-01-16 02:22:55 (running for 00:04:48.23)
Using FIFO scheduling algorithm.
Logical resource usage: 3.0/10 CPUs, 1.0/1 GPUs (0.0/1.0 accelerator_type:L4)
Result logdir: /tmp/ray/session_2025-01-16_02-18-04_666737_42841/artifacts/2025-01-16_02-18-06/PPO_2025-01-16_02-18-06/driver_artifacts
Number of trials: 1/1 (1 RUNNING)




2025-01-16 02:22:56,996	INFO tune.py:1009 -- Wrote the latest version of all result files and experiment state to '/root/ray_results/PPO_2025-01-16_02-18-06' in 0.0354s.


[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Iter=43 win-rate=0.97 -> adding new opponent to the mix (main_v1).
Trial PPO_open_spiel_env_1f9c6_00000 reported env_runners/episode_len_mean=9.88,num_env_steps_sampled_lifetime=172000,env_runners/episode_return_mean=0.0 with parameters={'exploration_config': {}, 'extra_python_environs_for_driver': {}, 'extra_python_environs_for_worker': {}, 'placement_strategy': 'PACK', 'num_gpus': 0, '_fake_gpus': False, 'num_cpus_for_main_process': 1, 'eager_tracing': True, 'eager_max_retraces': 20, 'tf_session_args': {'intra_op_parallelism_threads': 2, 'inter_op_parallelism_threads': 2, 'gpu_options': {'allow_growth': True}, 'log_device_placement': False, 'device_count': {'CPU': 1}, 'allow_soft_placement': True}, 'local_tf_session_args': {'intra_op_parallelism_threads': 8, 'inter_op_parallelism_threads': 8}, 'torch_compile_learner': False, 'torch_compile_learner_what_to_compile': <TorchCompileWhatToCompile.FOR

[36m(PPO(env=open_spiel_env; env-runners=2; learners=1; multi-agent=True) pid=43809)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000042)
[36m(MultiAgentEnvRunner pid=43938)[0m OpenSpiel exception: /project/open_spiel/games/connect_four/connect_four.cc:96 CellAt(kRows - 1, move) == CellState::kEmpty[32m [repeated 14x across cluster][0m
[36m(MultiAgentEnvRunner pid=43939)[0m CellAt(kRows - 1, move) = X, CellState::kEmpty = Empty[32m [repeated 9x across cluster][0m
[36m(MultiAgentEnvRunner pid=43938)[0m CellAt(kRows - 1, move) = O, CellState::kEmpty = Empty[32m [repeated 5x across cluster][0m
2025-01-16 02:22:57,366	INFO tune.py:1041 -- Total run time: 290.39 seconds (289.97 seconds for the tuning loop).


In [9]:
# Restore trained Algorithm (set to non-explore behavior) and play against
# human on command line.
if args.num_episodes_human_play > 0:
    register_env("open_spiel_env",
                 lambda _: OpenSpielEnv(pyspiel.load_game(args.env)))
    num_episodes = 0
    config.explore = False
    algo = config.build()
    if args.from_checkpoint:
        algo.restore(args.from_checkpoint)
    else:
        checkpoint = results.get_best_result().checkpoint
        if not checkpoint:
            raise ValueError("No last checkpoint found in results!")
        algo.restore(checkpoint)

    if args.enable_new_api_stack:
        rl_module = algo.get_module("main")

    # Play from the command line against the trained agent
    # in an actual (non-RLlib-wrapped) open-spiel env.
    human_player = 1
    env = Environment(args.env)

    while num_episodes < args.num_episodes_human_play:
        print("You play as {}".format("o" if human_player else "x"))
        time_step = env.reset()
        while not time_step.last():
            player_id = time_step.observations["current_player"]
            if player_id == human_player:
                action = ask_user_for_action(time_step)
            else:
                obs = np.array(time_step.observations["info_state"][player_id])
                if args.enable_new_api_stack:
                    action = np.argmax(
                        rl_module.forward_inference(
                            {"obs": torch.from_numpy(obs).unsqueeze(0).float()}
                        )["action_dist_inputs"][0].numpy()
                    )
                else:
                    action = algo.compute_single_action(obs, policy_id="main")
                # In case computer chooses an invalid action, pick a
                # random one.
                legal = time_step.observations["legal_actions"][player_id]
                if action not in legal:
                    action = np.random.choice(legal)
            time_step = env.step([action])
            print(f"\n{env.get_state}")

        print(f"\n{env.get_state}")

        print("End of game!")
        if time_step.rewards[human_player] > 0:
            print("You win")
        elif time_step.rewards[human_player] < 0:
            print("You lose")
        else:
            print("Draw")
        # Switch order of players.
        human_player = 1 - human_player

        num_episodes += 1

    algo.stop()



Usage stats collection is enabled by default for nightly wheels. To disable this, run the following command: `ray disable-usage-stats` before starting Ray. See https://docs.ray.io/en/master/cluster/usage-stats.html for more details.


2025-01-16 02:23:00,033	INFO worker.py:1841 -- Started a local Ray instance.
[36m(pid=46255)[0m lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.
[36m(pid=46260)[0m lz4 not available, disabling sample compression. This will significantly impact RLlib performance. To install lz4, run `pip install lz4`.[32m [repeated 2x across cluster][0m
[36m(_WrappedExecutable pid=46260)[0m Setting up process group for: env:// [rank=0, world_size=1]
2025-01-16 02:23:18,559	INFO trainable.py:161 -- Trainable.setup took 19.788 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2025-01-16 02:23:18,591	INFO trainable.py:583 -- Restored on 172.28.0.12 from checkpoint: Checkpoint(filesystem=local, path=/root/ray_results/PPO_2025-01-16_02-18-06/PPO_open_spiel_env_1f9c6_00000_0_2025-01-16_02-18-07/checkpoint_000042)


You play as o

.......
.......
.......
.......
.......
..x....

Choose an action from [0, 1, 2, 3, 4, 5, 6]:
0

.......
.......
.......
.......
.......
o.x....


.......
.......
.......
.......
..x....
o.x....

Choose an action from [0, 1, 2, 3, 4, 5, 6]:
0

.......
.......
.......
.......
o.x....
o.x....


.......
.......
.......
..x....
o.x....
o.x....

Choose an action from [0, 1, 2, 3, 4, 5, 6]:
2

.......
.......
..o....
..x....
o.x....
o.x....


.......
.......
..o....
..x....
o.x....
o.xx...

Choose an action from [0, 1, 2, 3, 4, 5, 6]:
3

.......
.......
..o....
..x....
o.xo...
o.xx...


.......
.......
..o....
..x....
o.xo...
o.xxx..

Choose an action from [0, 1, 2, 3, 4, 5, 6]:
4

.......
.......
..o....
..x....
o.xoo..
o.xxx..


.......
.......
..o....
..x.x..
o.xoo..
o.xxx..

Choose an action from [0, 1, 2, 3, 4, 5, 6]:
3

.......
.......
..o....
..xox..
o.xoo..
o.xxx..


.......
.......
..o.x..
..xox..
o.xoo..
o.xxx..

Choose an action from [0, 1, 2, 3, 4, 5, 6]:
3

......