## Video Poker using rllib and PPO

In [1]:
#!pip install ray[rllib]

In [2]:
#!pip install ipywidgets gputil

In [3]:
import random
import gymnasium as gym
from gymnasium import spaces, vector
import numpy as np
from ray import tune
from collections import Counter
from scipy.stats import binom, nbinom, beta, poisson, gamma, norm, geom
from ray.rllib.env import MultiAgentEnv

## 1) Write up the environment and test that it is working.



In [4]:
# Define global variables
deck = ['2', '3', '4', '5', '6', '7', '8', '9', '10', 'Jack', 'Queen', 'King', 'Ace'] * 4

def initialize_deck():
    suits = ['Clubs', 'Diamonds', 'Hearts', 'Spades']
    ranks = ['Ace','2', '3', '4', '5', '6', '7', '8', '9', '10', 'Jack', 'Queen', 'King']
    deck = [(rank, suit) for suit in suits for rank in ranks]
    random.shuffle(deck)
    return deck

def partition_selected_unselected(data_list, indicator_list, max_select=None):
    if not max_select:
      max_select = len(data_list)
    unselected_list = []
    selected_list = []

    # Iterate over both lists simultaneously
    for data, indicator in zip(data_list, indicator_list):
        if indicator == 1 and len(selected_list) < max_select:
            selected_list.append(data)
        else:
            unselected_list.append(data)

    return unselected_list, selected_list

def create_card_matrix(cards):
    # Ranks and suits mapping to index
    ranks = {'Ace': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6,
             '8': 7, '9': 8, '10': 9, 'Jack': 10, 'Queen': 11, 'King': 12}
    suits = {'Clubs': 0, 'Diamonds': 1, 'Hearts': 2, 'Spades': 3}

    # Initialize the matrix with zeros
    matrix = [[0]*4 for _ in range(13)]

    # Fill the matrix based on cards input
    for rank, suit in cards:
        if rank in ranks and suit in suits:
            matrix[ranks[rank]][suits[suit]] = 1

    return matrix

def deal_hand(deck, hand_size):
    return [deck.pop() for _ in range(hand_size)]

def state_function(player_hand, deck):
    return {
        'player_hand': create_card_matrix(player_hand),
        'cards_left_in_deck': create_card_matrix(deck)
    }


# Define the rewards for each hand
rewards = {
    "Royal Flush": 800,
    "Straight Flush": 800,
    "Four of a Kind": 420,
    "Full House": 160,
    "Flush": 140,
    "Straight": 120,
    "Three of a Kind": 90,
    "Two Pair": 40,
    "Pair": 20,
    "High Card": 5
}

In [5]:
import gymnasium as gym
from collections import Counter

class CardGameEnv(gym.Env):
    def __init__(self, seed=None):
        self.hand_size = 8
        self.MAX_PLAYS = 4  # Limit the number of plays to 4
        self.MAX_DISCARDS = 3  # Limit the number of discards to 3

        self.action_space = gym.spaces.MultiDiscrete([2]*(1+self.hand_size))
        self.observation_space = gym.spaces.Dict({
            'player_hand': gym.spaces.MultiBinary([13, 4]),
            'cards_left_in_deck': gym.spaces.MultiBinary([13, 4])
        })

        self.num_discards = 0
        self.num_plays = 0
        self.deck = initialize_deck()
        self.player_hand = deal_hand(self.deck, self.hand_size)
        self.player_hand.sort()
        self.state = state_function(self.player_hand, self.deck)

    #added by Ron
    def save_checkpoint(self, tmp_checkpoint_dir):
        checkpoint_path = os.path.join(tmp_checkpoint_dir, "multiturn_model.pth")
        torch.save(self.model.state_dict(), checkpoint_path)
        return tmp_checkpoint_dir

    def load_checkpoint(self, tmp_checkpoint_dir):
        checkpoint_path = os.path.join(tmp_checkpoint_dir, "multiturn_model.pth")
        self.model.load_state_dict(torch.load(checkpoint_path))


    def reset(self, seed=None, options=None):
        self.num_discards = 0
        self.num_plays = 0
        self.deck = initialize_deck()
        self.player_hand = deal_hand(self.deck, self.hand_size)
        self.player_hand.sort()
        self.state = state_function(self.player_hand, self.deck)
        return self.state, {}

    def step(self, action):
        dec = action[0]
        if self.num_discards >= self.MAX_DISCARDS:
          dec = 1
        if self.num_plays >= self.MAX_PLAYS:
          dec = 0

        selection = action[1:]
        remaining_hand, selected_hand = partition_selected_unselected(self.player_hand, selection, max_select=5)

        if dec == 1:  # Play
            self.num_plays += 1
            hand_rank_counts = Counter([rank for rank, _ in selected_hand])
            hand_suit_counts = Counter([suit for _, suit in selected_hand])


            # Define the mapping of card ranks to their order
            card_order = {'Ace': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6, '8': 7, '9': 8, '10': 9, 'Jack': 10, 'Queen': 11, 'King': 12}

            # Check for different hand categories
            if len(hand_rank_counts) == 5 and len(hand_suit_counts) == 1 and all(rank in ['10', 'Jack', 'Queen', 'King', 'Ace'] for rank, _ in selected_hand):
                hand_category = "Royal Flush"
            elif len(hand_rank_counts) == 5 and len(hand_suit_counts) == 1 and all(rank in card_order.keys() for rank, _ in selected_hand) and sorted(selected_hand, key=lambda x: card_order[x[0]]) in (['Ace','2', '3', '4', '5'], ['2', '3', '4', '5', '6'], ['3', '4', '5', '6', '7'], ['4', '5', '6', '7', '8'], ['5', '6', '7', '8', '9'], ['6', '7', '8', '9', '10'], ['7', '8', '9', '10', 'Jack'], ['8', '9', '10', 'Jack', 'Queen'], ['9', '10', 'Jack', 'Queen', 'King']):
                hand_category = "Straight Flush"
            elif any(count == 4 for count in hand_rank_counts.values()):
                hand_category = "Four of a Kind"
            elif any(count == 3 for count in hand_rank_counts.values()) and any(count == 2 for count in hand_rank_counts.values()):
                hand_category = "Full House"
            elif len(hand_suit_counts) == 1 and len(selected_hand) == 5:
                hand_category = "Flush"
            elif len(hand_rank_counts) == 5 and all(rank in card_order.keys() for rank, _ in selected_hand) and sorted(selected_hand, key=lambda x: card_order[x[0]]) in (['Ace','2', '3', '4', '5'], ['2', '3', '4', '5', '6'], ['3', '4', '5', '6', '7'], ['4', '5', '6', '7', '8'], ['5', '6', '7', '8', '9'], ['6', '7', '8', '9', '10'], ['7', '8', '9', '10', 'Jack'], ['8', '9', '10', 'Jack', 'Queen'], ['9', '10', 'Jack', 'Queen', 'King']):
                hand_category = "Straight"
            elif any(count == 3 for count in hand_rank_counts.values()):
                hand_category = "Three of a Kind"
            elif sum(count == 2 for count in hand_rank_counts.values()) == 2:
                hand_category = "Two Pair"
            elif any(count == 2 for count in hand_rank_counts.values()):
                hand_category = "Pair"
            else:
                hand_category = "High Card"

            reward = rewards.get(hand_category, 0)  # Reward based on hand category

        elif dec == 0:  # Discard
            self.num_discards += 1
            reward = 0  # Discarding has no reward

        self.player_hand = remaining_hand + deal_hand(self.deck, len(selected_hand))
        self.player_hand.sort()
        self.state = state_function(self.player_hand, self.deck)

        done = self.num_plays == self.MAX_PLAYS or len(self.deck) == 0

        #print(f"Player's Hand: {', '.join([f'{rank} of {suit}' for rank, suit in self.player_hand])}")
        #print(f"Action: {action}, Reward: {reward}, Done: {done}")

        return self.state, reward, done, False, {}

In [6]:
# Create an instance of the CardGameEnv class
env = CardGameEnv()
env.reset()

#Nice way to test Action Space before running RLLib
action = env.action_space.sample()
print(action)
print(env.player_hand)
state, _, _, _, _ = env.step(action)
len(state['cards_left_in_deck'])
print(env.player_hand)

[1 0 1 1 0 0 1 0 0]
[('2', 'Diamonds'), ('2', 'Spades'), ('3', 'Spades'), ('6', 'Clubs'), ('7', 'Spades'), ('9', 'Hearts'), ('Ace', 'Diamonds'), ('Ace', 'Spades')]
[('2', 'Clubs'), ('2', 'Diamonds'), ('6', 'Clubs'), ('7', 'Hearts'), ('7', 'Spades'), ('8', 'Diamonds'), ('Ace', 'Diamonds'), ('Ace', 'Spades')]


In [7]:
# Create an instance of the CardGameEnv class
env = CardGameEnv()
env.reset()

env.observation_space.sample()

# Testing if our observation space matches is being returned by our step function
env.observation_space.contains(state)

True

In [8]:
state, _ = env.reset()
env.observation_space.contains(state)

True

In [9]:
# Create an instance of the CardGameEnv class
env = CardGameEnv()
env.reset()

total_reward = 0  # Initialize total reward

# Game loop
while True:
    print("Player's Hand:")
    for rank, suit in env.player_hand:
        print(f"{rank} of {suit}")

    action = env.action_space.sample()

    state, reward, done, _, _ = env.step(action) # Ignoring False, {}

    total_reward += reward  # Accumulate the reward obtained in each step

    print(f"Decision made by the player: {action}")
    print(f"Total Reward after this turn: {total_reward}")

    if done:
        break

print("Game over")

Player's Hand:
3 of Diamonds
5 of Clubs
7 of Clubs
7 of Diamonds
7 of Spades
9 of Spades
Ace of Spades
Queen of Spades
Decision made by the player: [0 0 1 1 1 1 1 0 0]
Total Reward after this turn: 0
Player's Hand:
10 of Diamonds
3 of Diamonds
5 of Diamonds
8 of Clubs
Ace of Spades
King of Diamonds
Queen of Clubs
Queen of Spades
Decision made by the player: [1 1 0 1 0 0 0 1 0]
Total Reward after this turn: 5
Player's Hand:
2 of Clubs
3 of Diamonds
3 of Hearts
8 of Clubs
9 of Hearts
Ace of Spades
King of Diamonds
Queen of Spades
Decision made by the player: [0 0 1 1 1 1 0 0 1]
Total Reward after this turn: 5
Player's Hand:
2 of Clubs
4 of Diamonds
5 of Spades
6 of Hearts
8 of Hearts
9 of Diamonds
Ace of Spades
King of Diamonds
Decision made by the player: [0 0 1 0 1 0 0 0 0]
Total Reward after this turn: 5
Player's Hand:
2 of Clubs
5 of Spades
6 of Diamonds
8 of Hearts
9 of Diamonds
Ace of Spades
King of Diamonds
Queen of Hearts
Decision made by the player: [0 1 0 0 0 1 1 1 1]
Total Rew

## 2) Use RLlib to get as high an expected score as possible with 4 Plays and 3 Discards.

In [10]:
import ray
from ray import tune, air, train
from ray.rllib.algorithms.ppo import PPOConfig
from functools import partial

In [11]:
if ray.is_initialized():
  ray.shutdown()
ray.init(num_cpus=20)

2024-04-23 12:11:09,672	INFO worker.py:1752 -- Started a local Ray instance.


0,1
Python version:,3.10.11
Ray version:,2.10.0


[36m(PPO pid=37324)[0m Trainable.setup took 15.648 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
[36m(PPO pid=37324)[0m Restored on 127.0.0.1 from checkpoint: Checkpoint(filesystem=local, path=C:/Users/Fitz/ray_results/PPO_2024-04-22_19-11-08/PPO_CardGameEnv_bf9f4_00000_0_2024-04-22_19-11-08/checkpoint_000009)
[36m(PPO pid=37324)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/Fitz/ray_results/PPO_2024-04-23_12-13-30/PPO_CardGameEnv_9203f_00000_0_2024-04-23_12-13-30/checkpoint_000000)
[36m(PPO pid=37324)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/Fitz/ray_results/PPO_2024-04-23_12-13-30/PPO_CardGameEnv_9203f_00000_0_2024-04-23_12-13-30/checkpoint_000001)
[36m(PPO pid=37324)[0m Checkpoint successfully created at: Checkpoint(filesystem=local, path=C:/Users/Fitz/ray_results/PPO_2024-04-23_12-13-30/PPO_CardGameEnv_9203f_00000_0_2024-0

In [12]:
config = (PPOConfig()
          .environment(CardGameEnv)
          .framework('torch')
          .training(gamma=1)
          .rollouts(num_rollout_workers=19)
)

# Equivalent code added via .run call to use new checkpoint features, check next cell
# 
#stop = {"timesteps_total": 2000000}
#checkpoint_path = "C:\\Users\\Fitz\\ray_results\\PPO_2024-04-22_19-11-08\\PPO_CardGameEnv_bf9f4_00000_0_2024-04-22_19-11-08\\checkpoint_000009"
#
#tuner = tune.Tuner(
#    "PPO",
#    param_space=config.to_dict(),
#    run_config=air.RunConfig(
#        stop=stop,
#        checkpoint_config=train.CheckpointConfig(
#            checkpoint_frequency=50,
#            checkpoint_at_end=True)),
#)
#tuner.fit()





In [14]:
# This checkpoint path will need to be updated every run to pick up in the right place.
# Use the "best checkpoint" spit out below for reference
checkpoint_path = 'C:\\Users\\Fitz\\ray_results\\PPO_2024-04-22_19-11-08\\PPO_CardGameEnv_bf9f4_00000_0_2024-04-22_19-11-08\\checkpoint_000009'
stop = {"training_iteration": 1000}
analysis = tune.run(
    "PPO",  # or use ppo.PPOTrainer if you prefer not to use the string identifier
    config=config,
    restore=checkpoint_path,
    stop=stop, 
    checkpoint_freq=50,  
    checkpoint_at_end=True
)

2024-04-23 12:13:30,217	INFO tune.py:613 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
  gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


0,1
Current time:,2024-04-23 13:51:38
Running for:,01:38:08.06
Memory:,31.5/127.9 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_CardGameEnv_9203f_00000,TERMINATED,127.0.0.1:37324,1000,5852.71,4000000,173.922,900,20,6.54412




Trial name,agent_timesteps_total,connector_metrics,counters,custom_metrics,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,info,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_sampled_throughput_per_sec,num_env_steps_trained,num_env_steps_trained_this_iter,num_env_steps_trained_throughput_per_sec,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,timers
PPO_CardGameEnv_9203f_00000,4000000,"{'ObsPreprocessorConnector_ms': 0.056225723690456815, 'StateBufferConnector_ms': 0.0127969614041397, 'ViewRequirementAgentConnector_ms': 0.18993857639287812}","{'num_env_steps_sampled': 4000000, 'num_env_steps_trained': 4000000, 'num_agent_steps_sampled': 4000000, 'num_agent_steps_trained': 4000000}",{},6.54412,{},900,173.922,20,612,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 1.0116822875315143, 'cur_kl_coeff': 0.4500000000000001, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 9.916556386024721, 'policy_loss': -0.07199774804695319, 'vf_loss': 9.982358870967742, 'vf_explained_var': 8.662541707356771e-07, 'kl': 0.013767316543621696, 'entropy': 1.8508510284526374, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 128.0, 'num_grad_updates_lifetime': 464535.5, 'diff_num_grad_updates_vs_sampler_policy': 464.5}}, 'num_env_steps_sampled': 4000000, 'num_env_steps_trained': 4000000, 'num_agent_steps_sampled': 4000000, 'num_agent_steps_trained': 4000000}",4000000,4000000,4000000,4000,339.543,4000000,4000,339.543,0,19,0,0,4000,"{'cpu_util_percent': 20.259999999999998, 'ram_util_percent': 24.660000000000007, 'gpu_util_percent0': 1.0, 'vram_util_percent0': 0.96728515625}",{},{},{},"{'mean_raw_obs_processing_ms': 0.7594124651379951, 'mean_inference_ms': 4.294106366495418, 'mean_action_processing_ms': 0.2113343891649255, 'mean_env_wait_ms': 0.10216062209695567, 'mean_env_render_ms': 0.0}","{'episode_reward_max': 900.0, 'episode_reward_min': 20.0, 'episode_reward_mean': 173.92156862745097, 'episode_len_mean': 6.544117647058823, 'episode_media': {}, 'episodes_this_iter': 612, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [205.0, 120.0, 190.0, 190.0, 50.0, 50.0, 220.0, 470.0, 205.0, 150.0, 120.0, 120.0, 345.0, 190.0, 120.0, 55.0, 240.0, 120.0, 260.0, 85.0, 520.0, 70.0, 35.0, 50.0, 70.0, 450.0, 50.0, 205.0, 205.0, 120.0, 50.0, 190.0, 65.0, 35.0, 70.0, 50.0, 120.0, 85.0, 35.0, 35.0, 275.0, 185.0, 175.0, 120.0, 205.0, 210.0, 205.0, 135.0, 175.0, 205.0, 85.0, 85.0, 605.0, 205.0, 155.0, 35.0, 220.0, 35.0, 190.0, 50.0, 190.0, 210.0, 275.0, 105.0, 85.0, 55.0, 120.0, 225.0, 450.0, 50.0, 220.0, 310.0, 135.0, 170.0, 50.0, 465.0, 55.0, 190.0, 135.0, 260.0, 105.0, 190.0, 345.0, 70.0, 65.0, 20.0, 240.0, 220.0, 80.0, 175.0, 225.0, 100.0, 435.0, 225.0, 205.0, 135.0, 120.0, 190.0, 190.0, 220.0, 85.0, 105.0, 450.0, 20.0, 120.0, 310.0, 290.0, 465.0, 50.0, 555.0, 295.0, 135.0, 205.0, 105.0, 105.0, 70.0, 65.0, 35.0, 50.0, 465.0, 465.0, 85.0, 120.0, 150.0, 450.0, 435.0, 105.0, 150.0, 225.0, 190.0, 120.0, 435.0, 50.0, 120.0, 450.0, 170.0, 345.0, 225.0, 20.0, 240.0, 50.0, 35.0, 205.0, 155.0, 50.0, 65.0, 35.0, 205.0, 205.0, 135.0, 120.0, 220.0, 70.0, 85.0, 135.0, 120.0, 500.0, 70.0, 435.0, 35.0, 240.0, 205.0, 120.0, 135.0, 35.0, 140.0, 190.0, 65.0, 150.0, 190.0, 295.0, 135.0, 190.0, 135.0, 205.0, 155.0, 80.0, 135.0, 205.0, 50.0, 140.0, 135.0, 290.0, 465.0, 450.0, 50.0, 80.0, 225.0, 480.0, 105.0, 80.0, 135.0, 190.0, 65.0, 205.0, 135.0, 35.0, 150.0, 205.0, 50.0, 120.0, 220.0, 70.0, 225.0, 205.0, 135.0, 535.0, 205.0, 210.0, 80.0, 50.0, 120.0, 295.0, 205.0, 465.0, 50.0, 70.0, 55.0, 100.0, 65.0, 210.0, 450.0, 20.0, 205.0, 35.0, 190.0, 105.0, 170.0, 105.0, 260.0, 210.0, 35.0, 80.0, 220.0, 500.0, 120.0, 450.0, 135.0, 70.0, 35.0, 135.0, 485.0, 535.0, 65.0, 550.0, 135.0, 190.0, 205.0, 175.0, 170.0, 205.0, 120.0, 50.0, 80.0, 70.0, 220.0, 240.0, 450.0, 190.0, 105.0, 465.0, 55.0, 170.0, 100.0, 70.0, 50.0, 435.0, 175.0, 105.0, 150.0, 50.0, 470.0, 85.0, 175.0, 135.0, 155.0, 205.0, 50.0, 105.0, 50.0, 120.0, 55.0, 35.0, 135.0, 65.0, 65.0, 225.0, 70.0, 260.0, 190.0, 35.0, 50.0, 120.0, 555.0, 65.0, 465.0, 120.0, 190.0, 35.0, 450.0, 50.0, 50.0, 80.0, 175.0, 120.0, 205.0, 275.0, 190.0, 205.0, 205.0, 225.0, 50.0, 65.0, 190.0, 65.0, 55.0, 550.0, 135.0, 55.0, 70.0, 175.0, 465.0, 480.0, 225.0, 150.0, 50.0, 190.0, 290.0, 435.0, 85.0, 205.0, 140.0, 290.0, 190.0, 70.0, 175.0, 170.0, 345.0, 190.0, 90.0, 135.0, 220.0, 35.0, 35.0, 450.0, 50.0, 465.0, 85.0, 260.0, 50.0, 120.0, 55.0, 80.0, 310.0, 205.0, 900.0, 275.0, 155.0, 205.0, 35.0, 35.0, 35.0, 205.0, 85.0, 225.0, 120.0, 135.0, 170.0, 155.0, 50.0, 240.0, 100.0, 150.0, 50.0, 175.0, 120.0, 135.0, 190.0, 210.0, 50.0, 105.0, 450.0, 105.0, 35.0, 535.0, 70.0, 150.0, 535.0, 205.0, 65.0, 85.0, 190.0, 205.0, 205.0, 80.0, 65.0, 225.0, 225.0, 245.0, 105.0, 260.0, 35.0, 205.0, 240.0, 35.0, 205.0, 65.0, 35.0, 150.0, 135.0, 120.0, 485.0, 190.0, 135.0, 205.0, 170.0, 155.0, 50.0, 105.0, 175.0, 85.0, 120.0, 435.0, 225.0, 220.0, 190.0, 175.0, 70.0, 225.0, 190.0, 120.0, 120.0, 50.0, 135.0, 155.0, 70.0, 120.0, 50.0, 70.0, 275.0, 120.0, 220.0, 140.0, 50.0, 120.0, 20.0, 80.0, 205.0, 330.0, 485.0, 240.0, 210.0, 465.0, 155.0, 190.0, 50.0, 35.0, 120.0, 105.0, 190.0, 70.0, 190.0, 35.0, 450.0, 80.0, 260.0, 190.0, 85.0, 120.0, 65.0, 240.0, 120.0, 65.0, 275.0, 225.0, 450.0, 50.0, 295.0, 50.0, 20.0, 205.0, 35.0, 450.0, 65.0, 290.0, 150.0, 65.0, 155.0, 70.0, 480.0, 105.0, 465.0, 885.0, 105.0, 120.0, 85.0, 35.0, 190.0, 70.0, 120.0, 120.0, 170.0, 70.0, 295.0, 175.0, 50.0, 35.0, 135.0, 65.0, 65.0, 450.0, 70.0, 120.0, 120.0, 65.0, 480.0, 135.0, 140.0, 485.0, 190.0, 155.0, 220.0, 450.0, 120.0, 50.0, 190.0, 105.0, 135.0, 210.0, 100.0, 190.0, 35.0, 155.0, 120.0, 105.0, 65.0, 135.0, 35.0, 105.0, 220.0, 190.0, 275.0, 225.0, 100.0, 120.0, 135.0, 85.0, 65.0, 170.0, 50.0, 135.0, 155.0, 35.0, 65.0, 535.0, 190.0, 20.0, 50.0, 205.0, 35.0, 205.0, 120.0, 225.0, 105.0, 65.0, 85.0, 500.0, 190.0, 55.0, 275.0, 55.0, 345.0, 135.0, 120.0, 190.0, 135.0, 85.0, 275.0, 190.0, 50.0, 135.0, 170.0, 205.0, 65.0, 135.0, 205.0, 135.0, 105.0, 50.0, 120.0, 85.0, 225.0, 240.0, 210.0, 80.0, 275.0, 275.0, 55.0, 465.0, 175.0, 290.0, 205.0, 100.0, 70.0, 35.0, 120.0, 150.0, 210.0, 105.0, 465.0, 55.0, 225.0], 'episode_lengths': [4, 7, 7, 6, 7, 7, 6, 7, 7, 4, 7, 7, 7, 7, 6, 6, 7, 7, 7, 6, 4, 7, 7, 7, 7, 7, 6, 7, 7, 4, 7, 6, 7, 7, 7, 7, 5, 4, 7, 7, 5, 7, 5, 7, 7, 7, 7, 7, 4, 5, 7, 7, 7, 7, 5, 7, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 4, 7, 7, 7, 7, 5, 7, 5, 7, 7, 7, 7, 4, 7, 7, 7, 7, 7, 7, 7, 7, 4, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 6, 7, 6, 6, 7, 7, 7, 7, 7, 6, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 5, 7, 7, 6, 7, 6, 5, 7, 5, 7, 7, 6, 7, 5, 7, 7, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 7, 7, 6, 7, 7, 6, 7, 6, 6, 7, 7, 7, 7, 6, 7, 5, 7, 7, 7, 5, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 5, 5, 7, 7, 7, 7, 6, 4, 7, 6, 5, 6, 6, 7, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 7, 5, 7, 7, 6, 7, 7, 4, 7, 7, 7, 6, 6, 7, 7, 7, 6, 7, 6, 7, 6, 6, 6, 4, 7, 7, 4, 7, 7, 6, 7, 7, 5, 6, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 6, 6, 7, 7, 7, 7, 6, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 4, 7, 7, 7, 4, 7, 7, 7, 7, 7, 7, 6, 7, 6, 5, 7, 7, 6, 4, 7, 7, 6, 5, 5, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 6, 7, 7, 6, 6, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 6, 7, 6, 6, 6, 7, 7, 7, 4, 7, 7, 7, 6, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 5, 7, 7, 7, 7, 7, 7, 7, 7, 6, 7, 7, 5, 4, 6, 7, 7, 6, 4, 7, 7, 6, 7, 6, 7, 6, 7, 6, 7, 7, 6, 7, 7, 5, 7, 7, 7, 6, 7, 7, 7, 7, 7, 5, 4, 6, 5, 7, 7, 7, 7, 6, 7, 7, 7, 6, 7, 7, 5, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 6, 7, 7, 4, 4, 7, 6, 6, 7, 6, 7, 7, 7, 7, 7, 5, 7, 7, 7, 7, 7, 7, 4, 7, 7, 6, 7, 6, 6, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 4, 7, 7, 7, 7, 7, 4, 6, 7, 7, 7, 5, 4, 7, 7, 7, 4, 7, 7, 4, 7, 7, 7, 7, 7, 4, 7, 4, 6, 7, 7, 7, 7, 7, 4, 7, 7, 7, 7, 5, 7, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 4, 7, 7, 7, 7, 6, 7, 5, 7, 7, 7, 7, 7, 7, 5, 7, 5, 7, 7, 7, 6, 7, 7, 7, 6, 7, 7, 7, 7, 7, 5, 7, 7, 7, 6, 7, 7, 6, 7, 7, 7, 7, 5, 7, 7, 7]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 0.7594124651379951, 'mean_inference_ms': 4.294106366495418, 'mean_action_processing_ms': 0.2113343891649255, 'mean_env_wait_ms': 0.10216062209695567, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0, 'connector_metrics': {'ObsPreprocessorConnector_ms': 0.056225723690456815, 'StateBufferConnector_ms': 0.0127969614041397, 'ViewRequirementAgentConnector_ms': 0.18993857639287812}}","{'training_iteration_time_ms': 11896.015, 'sample_time_ms': 1227.615, 'load_time_ms': 0.399, 'load_throughput': 10027622.975, 'learn_time_ms': 10651.153, 'learn_throughput': 375.546, 'synch_weights_time_ms': 15.952}"


2024-04-23 13:51:38,354	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to 'C:/Users/Fitz/ray_results/PPO_2024-04-23_12-13-30' in 0.0309s.
2024-04-23 13:51:38,626	INFO tune.py:1048 -- Total run time: 5888.41 seconds (5888.03 seconds for the tuning loop).


In [16]:
#Get best results
best_trial = analysis.get_best_trial("episode_reward_mean", "max", "last")
best_checkpoint = analysis.get_best_checkpoint(best_trial, "episode_reward_mean", "max")
print("Best checkpoint:", best_checkpoint)

Best checkpoint: Checkpoint(filesystem=local, path=C:/Users/Fitz/ray_results/PPO_2024-04-23_12-13-30/PPO_CardGameEnv_9203f_00000_0_2024-04-23_12-13-30/checkpoint_000008)


## 3) Use RLlib to get as high an expected score as possible with 1 Play and 1 Discard.

In [17]:
import gymnasium as gym
from collections import Counter

class CardGameEnv_One_play(gym.Env):
    def __init__(self, seed=None):
        self.hand_size = 8
        self.MAX_PLAYS = 1  # Limit the number of plays to 1
        self.MAX_DISCARDS = 1  # Limit the number of discards to 1

        self.action_space = gym.spaces.MultiDiscrete([2]*(1+self.hand_size))
        self.observation_space = gym.spaces.Dict({
            'player_hand': gym.spaces.MultiBinary([13, 4]),
            'cards_left_in_deck': gym.spaces.MultiBinary([13, 4])
        })

        self.num_discards = 0
        self.num_plays = 0
        self.deck = initialize_deck()
        self.player_hand = deal_hand(self.deck, self.hand_size)
        self.player_hand.sort()
        self.state = state_function(self.player_hand, self.deck)


    def reset(self, seed=None, options=None):
        self.num_discards = 0
        self.num_plays = 0
        self.deck = initialize_deck()
        self.player_hand = deal_hand(self.deck, self.hand_size)
        self.player_hand.sort()
        self.state = state_function(self.player_hand, self.deck)
        return self.state, {}

    #added by Ron
    def save_checkpoint(self, tmp_checkpoint_dir):
        checkpoint_path = os.path.join(tmp_checkpoint_dir, "singleturn_model.pth")
        torch.save(self.model.state_dict(), checkpoint_path)
        return tmp_checkpoint_dir

    def load_checkpoint(self, tmp_checkpoint_dir):
        checkpoint_path = os.path.join(tmp_checkpoint_dir, "single_model.pth")
        self.model.load_state_dict(torch.load(checkpoint_path))

    def step(self, action):
        dec = action[0]
        if self.num_discards >= self.MAX_DISCARDS:
          dec = 1
        if self.num_plays >= self.MAX_PLAYS:
          dec = 0

        selection = action[1:]
        remaining_hand, selected_hand = partition_selected_unselected(self.player_hand, selection, max_select=5)

        if dec == 1:  # Play
            self.num_plays += 1
            hand_rank_counts = Counter([rank for rank, _ in selected_hand])
            hand_suit_counts = Counter([suit for _, suit in selected_hand])


            # Define the mapping of card ranks to their order
            card_order = {'Ace': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6, '8': 7, '9': 8, '10': 9, 'Jack': 10, 'Queen': 11, 'King': 12}

            # Check for different hand categories
            if len(hand_rank_counts) == 5 and len(hand_suit_counts) == 1 and all(rank in ['10', 'Jack', 'Queen', 'King', 'Ace'] for rank, _ in selected_hand):
                hand_category = "Royal Flush"
            elif len(hand_rank_counts) == 5 and len(hand_suit_counts) == 1 and all(rank in card_order.keys() for rank, _ in selected_hand) and sorted(selected_hand, key=lambda x: card_order[x[0]]) in (['Ace','2', '3', '4', '5'], ['2', '3', '4', '5', '6'], ['3', '4', '5', '6', '7'], ['4', '5', '6', '7', '8'], ['5', '6', '7', '8', '9'], ['6', '7', '8', '9', '10'], ['7', '8', '9', '10', 'Jack'], ['8', '9', '10', 'Jack', 'Queen'], ['9', '10', 'Jack', 'Queen', 'King']):
                hand_category = "Straight Flush"
            elif any(count == 4 for count in hand_rank_counts.values()):
                hand_category = "Four of a Kind"
            elif any(count == 3 for count in hand_rank_counts.values()) and any(count == 2 for count in hand_rank_counts.values()):
                hand_category = "Full House"
            elif len(hand_suit_counts) == 1 and len(selected_hand) == 5:
                hand_category = "Flush"
            elif len(hand_rank_counts) == 5 and all(rank in card_order.keys() for rank, _ in selected_hand) and sorted(selected_hand, key=lambda x: card_order[x[0]]) in (['Ace','2', '3', '4', '5'], ['2', '3', '4', '5', '6'], ['3', '4', '5', '6', '7'], ['4', '5', '6', '7', '8'], ['5', '6', '7', '8', '9'], ['6', '7', '8', '9', '10'], ['7', '8', '9', '10', 'Jack'], ['8', '9', '10', 'Jack', 'Queen'], ['9', '10', 'Jack', 'Queen', 'King']):
                hand_category = "Straight"
            elif any(count == 3 for count in hand_rank_counts.values()):
                hand_category = "Three of a Kind"
            elif sum(count == 2 for count in hand_rank_counts.values()) == 2:
                hand_category = "Two Pair"
            elif any(count == 2 for count in hand_rank_counts.values()):
                hand_category = "Pair"
            else:
                hand_category = "High Card"

            reward = rewards.get(hand_category, 0)  # Reward based on hand category

        elif dec == 0:  # Discard
            self.num_discards += 1
            reward = 0  # Discarding has no reward

        self.player_hand = remaining_hand + deal_hand(self.deck, len(selected_hand))
        self.player_hand.sort()
        self.state = state_function(self.player_hand, self.deck)

        done = self.num_plays == self.MAX_PLAYS or len(self.deck) == 0

        #print(f"Player's Hand: {', '.join([f'{rank} of {suit}' for rank, suit in self.player_hand])}")
        #print(f"Action: {action}, Reward: {reward}, Done: {done}")

        return self.state, reward, done, False, {}

In [18]:
# Create an instance of the CardGameEnv class
env = CardGameEnv_One_play()
env.reset()

#Nice way to test Action Space before running RLLib
action = env.action_space.sample()
print(action)
print(env.player_hand)
state, _, _, _, _ = env.step(action)
len(state['cards_left_in_deck'])
print(env.player_hand)

[0 1 1 0 1 1 1 0 0]
[('10', 'Spades'), ('2', 'Diamonds'), ('2', 'Hearts'), ('4', 'Hearts'), ('5', 'Hearts'), ('6', 'Hearts'), ('7', 'Clubs'), ('Jack', 'Diamonds')]
[('2', 'Hearts'), ('2', 'Spades'), ('4', 'Diamonds'), ('4', 'Spades'), ('7', 'Clubs'), ('9', 'Spades'), ('Jack', 'Diamonds'), ('Queen', 'Diamonds')]


In [19]:
# Create an instance of the CardGameEnv_One_play class
env = CardGameEnv_One_play()
env.reset()

env.observation_space.sample()

# Testing if our observation space matches is being returned by our step function
env.observation_space.contains(state)

True

In [20]:
state, _ = env.reset()
env.observation_space.contains(state)

True

In [21]:
# Create an instance of the CardGameEnv_One_play class
env = CardGameEnv_One_play()
env.reset()

total_reward = 0  # Initialize total reward

# Game loop
while True:
    print("Player's Hand:")
    for rank, suit in env.player_hand:
        print(f"{rank} of {suit}")

    action = env.action_space.sample()

    state, reward, done, _, _ = env.step(action) # Ignoring False, {}

    total_reward += reward  # Accumulate the reward obtained in each step

    print(f"Decision made by the player: {action}")
    print(f"Total Reward after this turn: {total_reward}")

    if done:
        break

print("Game over")

Player's Hand:
10 of Clubs
3 of Diamonds
4 of Clubs
9 of Clubs
Ace of Clubs
Jack of Hearts
King of Clubs
King of Spades
Decision made by the player: [1 0 0 0 0 0 1 1 1]
Total Reward after this turn: 20
Game over


In [None]:
one_play_config = (PPOConfig()
          .environment(CardGameEnv_One_play)
          .framework('torch')
          .training(gamma=1)
          .rollouts(num_rollout_workers=19)
)

#stop = {"timesteps_total": 1000000}
#
#tuner = tune.Tuner(
#    "PPO",
#    param_space=config.to_dict(),
#    run_config=air.RunConfig(stop=stop, checkpoint_config=train.CheckpointConfig(checkpoint_frequency=50, checkpoint_at_end=True)),
#)
#
#tuner.fit()

one_play_checkpoint_path = 'C:\\Users\\Fitz\\ray_results\\PPO_2024-04-22_20-50-14\\PPO_CardGameEnv_One_play_97cf1_00000_0_2024-04-22_20-50-14\\checkpoint_000004'
one_play_stop = {"training_iteration": 1000}
one_play_analysis = tune.run(
    "PPO",  # or use ppo.PPOTrainer if you prefer not to use the string identifier
    config=one_play_config,
    restore=one_play_checkpoint_path,
    stop=one_play_stop, 
    checkpoint_freq=50,  
    checkpoint_at_end=True
)

one_play_best_trial = one_play_analysis.get_best_trial("episode_reward_mean", "max", "last")
one_play_best_checkpoint = one_play_analysis.get_best_checkpoint(best_trial, "episode_reward_mean", "max")
print("Best checkpoint:", best_checkpoint)

2024-04-23 14:04:12,622	INFO tune.py:613 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2024-04-23 16:15:03
Running for:,02:10:50.45
Memory:,36.0/127.9 GiB

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_CardGameEnv_One_play_092a9_00000,RUNNING,127.0.0.1:32332,890,7787.51,3560000,41.604,420,5,1.66917




Trial name,agent_timesteps_total,connector_metrics,counters,custom_metrics,episode_len_mean,episode_media,episode_reward_max,episode_reward_mean,episode_reward_min,episodes_this_iter,info,num_agent_steps_sampled,num_agent_steps_trained,num_env_steps_sampled,num_env_steps_sampled_this_iter,num_env_steps_sampled_throughput_per_sec,num_env_steps_trained,num_env_steps_trained_this_iter,num_env_steps_trained_throughput_per_sec,num_faulty_episodes,num_healthy_workers,num_in_flight_async_reqs,num_remote_worker_restarts,num_steps_trained_this_iter,perf,policy_reward_max,policy_reward_mean,policy_reward_min,sampler_perf,sampler_results,timers
PPO_CardGameEnv_One_play_092a9_00000,3564000,"{'ObsPreprocessorConnector_ms': 0.05739418811511516, 'StateBufferConnector_ms': 0.02733190390024838, 'ViewRequirementAgentConnector_ms': 0.2132370794356765}","{'num_env_steps_sampled': 3564000, 'num_env_steps_trained': 3564000, 'num_agent_steps_sampled': 3564000, 'num_agent_steps_trained': 3564000}",{},1.67028,{},420,42.0242,5,2396,"{'learner': {'default_policy': {'learner_stats': {'allreduce_latency': 0.0, 'grad_gnorm': 1.6861687252598425, 'cur_kl_coeff': 0.4500000000000001, 'cur_lr': 5.0000000000000016e-05, 'total_loss': 9.91628670641171, 'policy_loss': -0.07192527166898212, 'vf_loss': 9.97983870967742, 'vf_explained_var': -1.9740032893355174e-08, 'kl': 0.01860734242036415, 'entropy': 1.9146388915277297, 'entropy_coeff': 0.0}, 'model': {}, 'custom_metrics': {}, 'num_agent_steps_trained': 128.0, 'num_grad_updates_lifetime': 595665.5, 'diff_num_grad_updates_vs_sampler_policy': 464.5}}, 'num_env_steps_sampled': 3564000, 'num_env_steps_trained': 3564000, 'num_agent_steps_sampled': 3564000, 'num_agent_steps_trained': 3564000}",3564000,3564000,3564000,4000,327.802,3564000,4000,327.802,0,19,0,0,4000,"{'cpu_util_percent': 26.586666666666666, 'ram_util_percent': 28.17333333333334, 'gpu_util_percent0': 0.9993333333333333, 'vram_util_percent0': 0.9396809895833333}",{},{},{},"{'mean_raw_obs_processing_ms': 1.41686600199995, 'mean_inference_ms': 4.470883941429691, 'mean_action_processing_ms': 0.22201819167087117, 'mean_env_wait_ms': 0.11156443073436481, 'mean_env_render_ms': 0.0}","{'episode_reward_max': 420.0, 'episode_reward_min': 5.0, 'episode_reward_mean': 42.024207011686144, 'episode_len_mean': 1.6702838063439065, 'episode_media': {}, 'episodes_this_iter': 2396, 'policy_reward_min': {}, 'policy_reward_max': {}, 'policy_reward_mean': {}, 'custom_metrics': {}, 'hist_stats': {'episode_reward': [20.0, 20.0, 20.0, 20.0, 20.0, 40.0, 20.0, 20.0, 5.0, 20.0, 90.0, 40.0, 20.0, 5.0, 40.0, 40.0, 40.0, 20.0, 20.0, 20.0, 40.0, 40.0, 20.0, 20.0, 40.0, 40.0, 5.0, 20.0, 90.0, 20.0, 90.0, 20.0, 40.0, 20.0, 90.0, 90.0, 40.0, 40.0, 90.0, 5.0, 40.0, 40.0, 20.0, 5.0, 20.0, 420.0, 20.0, 5.0, 40.0, 160.0, 5.0, 40.0, 40.0, 90.0, 90.0, 90.0, 20.0, 90.0, 40.0, 20.0, 160.0, 20.0, 20.0, 40.0, 90.0, 20.0, 40.0, 40.0, 160.0, 5.0, 90.0, 5.0, 90.0, 90.0, 20.0, 40.0, 40.0, 20.0, 5.0, 40.0, 90.0, 20.0, 40.0, 20.0, 5.0, 5.0, 20.0, 40.0, 160.0, 20.0, 5.0, 20.0, 40.0, 90.0, 40.0, 5.0, 20.0, 20.0, 40.0, 160.0, 40.0, 90.0, 40.0, 20.0, 5.0, 20.0, 20.0, 20.0, 20.0, 90.0, 90.0, 5.0, 40.0, 40.0, 40.0, 40.0, 5.0, 40.0, 20.0, 90.0, 20.0, 40.0, 90.0, 5.0, 90.0, 40.0, 5.0, 20.0, 20.0, 20.0, 90.0, 20.0, 5.0, 20.0, 20.0, 160.0, 5.0, 90.0, 40.0, 90.0, 90.0, 40.0, 5.0, 40.0, 90.0, 20.0, 20.0, 90.0, 90.0, 20.0, 160.0, 90.0, 40.0, 90.0, 40.0, 5.0, 20.0, 5.0, 40.0, 5.0, 40.0, 40.0, 20.0, 40.0, 20.0, 90.0, 40.0, 20.0, 40.0, 20.0, 40.0, 40.0, 90.0, 40.0, 20.0, 40.0, 40.0, 90.0, 40.0, 5.0, 40.0, 160.0, 5.0, 90.0, 40.0, 5.0, 90.0, 90.0, 40.0, 40.0, 160.0, 20.0, 20.0, 20.0, 5.0, 20.0, 40.0, 90.0, 5.0, 5.0, 5.0, 40.0, 20.0, 90.0, 5.0, 90.0, 20.0, 20.0, 420.0, 20.0, 90.0, 90.0, 20.0, 20.0, 5.0, 90.0, 90.0, 40.0, 20.0, 90.0, 20.0, 20.0, 40.0, 20.0, 5.0, 90.0, 20.0, 20.0, 160.0, 20.0, 20.0, 20.0, 5.0, 20.0, 40.0, 20.0, 90.0, 20.0, 40.0, 20.0, 20.0, 40.0, 5.0, 20.0, 90.0, 160.0, 90.0, 20.0, 20.0, 40.0, 20.0, 20.0, 90.0, 20.0, 160.0, 20.0, 40.0, 90.0, 20.0, 5.0, 20.0, 40.0, 20.0, 20.0, 40.0, 5.0, 5.0, 20.0, 20.0, 20.0, 20.0, 5.0, 5.0, 20.0, 20.0, 20.0, 20.0, 40.0, 90.0, 20.0, 20.0, 90.0, 40.0, 20.0, 20.0, 40.0, 40.0, 20.0, 90.0, 90.0, 90.0, 40.0, 20.0, 20.0, 90.0, 20.0, 20.0, 5.0, 20.0, 5.0, 20.0, 5.0, 40.0, 20.0, 40.0, 90.0, 20.0, 5.0, 5.0, 20.0, 40.0, 40.0, 90.0, 20.0, 90.0, 5.0, 20.0, 90.0, 40.0, 20.0, 90.0, 90.0, 5.0, 20.0, 20.0, 160.0, 20.0, 90.0, 160.0, 20.0, 20.0, 5.0, 5.0, 20.0, 5.0, 20.0, 40.0, 40.0, 5.0, 5.0, 20.0, 5.0, 90.0, 160.0, 5.0, 90.0, 20.0, 90.0, 40.0, 40.0, 160.0, 20.0, 20.0, 5.0, 20.0, 90.0, 20.0, 5.0, 5.0, 40.0, 5.0, 40.0, 5.0, 20.0, 40.0, 20.0, 40.0, 20.0, 40.0, 20.0, 40.0, 20.0, 90.0, 5.0, 5.0, 5.0, 5.0, 90.0, 5.0, 5.0, 20.0, 40.0, 40.0, 20.0, 40.0, 20.0, 5.0, 40.0, 40.0, 5.0, 5.0, 90.0, 40.0, 40.0, 40.0, 90.0, 20.0, 20.0, 90.0, 420.0, 90.0, 40.0, 90.0, 20.0, 20.0, 20.0, 20.0, 40.0, 5.0, 20.0, 40.0, 5.0, 5.0, 20.0, 20.0, 20.0, 40.0, 90.0, 20.0, 5.0, 20.0, 5.0, 90.0, 5.0, 5.0, 40.0, 20.0, 5.0, 20.0, 40.0, 40.0, 20.0, 40.0, 20.0, 90.0, 420.0, 90.0, 20.0, 160.0, 20.0, 40.0, 40.0, 40.0, 20.0, 40.0, 5.0, 40.0, 40.0, 90.0, 90.0, 40.0, 5.0, 90.0, 20.0, 20.0, 90.0, 20.0, 20.0, 40.0, 20.0, 40.0, 20.0, 40.0, 40.0, 5.0, 20.0, 20.0, 40.0, 20.0, 40.0, 90.0, 40.0, 40.0, 160.0, 5.0, 20.0, 90.0, 40.0, 20.0, 5.0, 40.0, 40.0, 5.0, 40.0, 20.0, 5.0, 20.0, 90.0, 5.0, 40.0, 40.0, 40.0, 20.0, 40.0, 90.0, 5.0, 40.0, 40.0, 90.0, 5.0, 20.0, 90.0, 40.0, 5.0, 90.0, 40.0, 20.0, 20.0, 40.0, 5.0, 90.0, 160.0, 40.0, 40.0, 40.0, 90.0, 20.0, 5.0, 90.0, 40.0, 40.0, 20.0, 90.0, 90.0, 90.0, 20.0, 20.0, 90.0, 40.0, 160.0, 90.0, 90.0, 40.0, 40.0, 90.0, 5.0, 90.0, 90.0, 5.0, 40.0, 20.0, 20.0, 20.0, 20.0, 20.0, 5.0, 40.0, 5.0, 90.0, 5.0, 20.0, 20.0, 20.0, 20.0, 20.0, 40.0, 90.0, 40.0, 40.0, 5.0, 90.0, 5.0, 20.0, 5.0, 20.0, 90.0, 20.0, 20.0, 40.0, 20.0, 40.0, 90.0, 20.0, 90.0, 90.0, 20.0, 5.0, 40.0, 5.0, 40.0, 90.0, 90.0, 90.0, 5.0, 90.0, 20.0, 20.0, 40.0, 20.0, 90.0, 40.0, 90.0, 40.0, 5.0, 20.0, 90.0, 20.0, 20.0, 20.0, 5.0, 90.0, 5.0, 90.0, 160.0, 5.0, 5.0, 20.0, 20.0, 20.0, 40.0, 5.0, 40.0, 420.0, 90.0, 90.0, 5.0, 90.0, 90.0, 40.0, 5.0, 5.0, 90.0, 20.0, 20.0, 20.0, 40.0, 5.0, 20.0, 20.0, 90.0, 20.0, 40.0, 40.0, 40.0, 90.0, 40.0, 20.0, 90.0, 90.0, 90.0, 5.0, 5.0, 20.0, 40.0, 40.0, 40.0, 20.0, 90.0, 90.0, 40.0, 90.0, 20.0, 20.0, 40.0, 90.0, 40.0, 20.0, 40.0, 40.0, 20.0, 40.0, 40.0, 20.0, 5.0, 40.0, 40.0, 5.0, 20.0, 20.0, 90.0, 20.0, 40.0, 20.0, 20.0, 90.0, 40.0, 20.0, 40.0, 20.0, 40.0, 40.0, 160.0, 20.0, 20.0, 20.0, 40.0, 20.0, 40.0, 90.0, 5.0, 20.0, 5.0, 5.0, 90.0, 20.0, 40.0, 20.0, 5.0, 90.0, 20.0, 40.0, 20.0, 20.0, 90.0, 20.0, 20.0, 90.0, 40.0, 20.0, 20.0, 20.0, 160.0, 40.0, 90.0, 20.0, 5.0, 20.0, 20.0, 40.0, 40.0, 20.0, 20.0, 20.0, 20.0, 40.0, 5.0, 20.0, 90.0, 160.0, 20.0, 90.0, 20.0, 40.0, 20.0, 20.0, 40.0, 90.0, 20.0, 5.0, 40.0, 160.0, 20.0, 20.0, 5.0, 5.0, 20.0, 5.0, 5.0, 20.0, 20.0, 40.0, 160.0, 20.0, 40.0, 40.0, 90.0, 20.0, 90.0, 40.0, 40.0, 20.0, 40.0, 5.0, 90.0, 20.0, 40.0, 5.0, 20.0, 5.0, 90.0, 40.0, 20.0, 5.0, 90.0, 5.0, 40.0, 5.0, 20.0, 20.0, 5.0, 40.0, 90.0, 5.0, 40.0, 20.0, 20.0, 5.0, 5.0, 90.0, 20.0, 5.0, 40.0, 90.0, 90.0, 90.0, 40.0, 20.0, 90.0, 90.0, 5.0, 20.0, 90.0, 20.0, 5.0, 40.0, 5.0, 40.0, 90.0, 40.0, 90.0, 20.0, 160.0, 20.0, 20.0, 40.0, 20.0, 40.0, 20.0, 40.0, 20.0, 40.0, 90.0, 40.0, 20.0, 20.0, 5.0, 20.0, 90.0, 20.0, 20.0, 20.0, 5.0, 40.0, 40.0, 40.0, 90.0, 160.0, 40.0, 40.0, 90.0, 90.0, 90.0, 90.0, 5.0, 90.0, 5.0, 20.0, 40.0, 90.0, 40.0, 5.0, 20.0, 20.0, 20.0, 160.0, 40.0, 40.0, 5.0, 5.0, 40.0, 20.0, 20.0, 20.0, 20.0, 160.0, 90.0, 20.0, 40.0, 20.0, 90.0, 20.0, 20.0, 5.0, 40.0, 20.0, 20.0, 40.0, 40.0, 90.0, 5.0, 40.0, 5.0, 20.0, 90.0, 5.0, 5.0, 5.0, 20.0, 20.0, 40.0, 5.0, 40.0, 20.0, 40.0, 20.0, 40.0, 90.0, 20.0, 40.0, 40.0, 40.0, 20.0, 20.0, 40.0, 20.0, 5.0, 40.0, 20.0, 90.0, 40.0, 20.0, 5.0, 90.0, 160.0, 90.0, 20.0, 20.0, 160.0, 90.0, 20.0, 40.0, 90.0, 90.0, 20.0, 40.0, 90.0, 20.0, 20.0, 5.0, 20.0, 90.0, 90.0, 20.0, 40.0, 40.0, 90.0, 20.0, 90.0, 20.0, 420.0, 40.0, 90.0, 90.0, 40.0, 40.0, 40.0, 90.0, 5.0, 20.0, 40.0, 20.0, 20.0, 40.0, 90.0, 20.0, 20.0, 5.0, 40.0, 40.0, 40.0, 40.0, 5.0, 40.0, 20.0, 90.0, 40.0, 20.0, 20.0, 40.0, 5.0, 5.0, 20.0, 40.0, 20.0, 40.0, 160.0, 5.0, 20.0, 20.0, 40.0, 90.0, 40.0, 40.0, 90.0, 40.0, 40.0, 40.0, 5.0, 40.0, 5.0, 40.0, 20.0, 5.0, 40.0, 20.0, 40.0, 160.0, 20.0, 20.0, 40.0, 5.0, 40.0, 20.0, 5.0, 90.0, 40.0, 20.0, 20.0, 90.0, 40.0, 5.0, 5.0, 20.0, 40.0, 20.0, 40.0, 40.0, 20.0, 90.0, 20.0, 90.0, 40.0, 160.0, 5.0, 20.0, 40.0, 20.0, 20.0, 5.0, 40.0, 5.0, 20.0, 20.0, 20.0, 20.0, 90.0, 40.0, 20.0, 5.0, 90.0, 20.0, 20.0, 90.0, 40.0, 20.0, 90.0, 40.0, 20.0, 40.0, 160.0, 5.0, 40.0, 40.0, 5.0, 40.0, 40.0, 5.0, 90.0, 40.0, 5.0, 90.0, 5.0, 20.0, 5.0, 90.0, 40.0, 20.0, 90.0, 20.0, 40.0, 5.0, 5.0, 40.0, 40.0, 5.0, 40.0, 20.0, 40.0, 20.0, 5.0, 5.0, 90.0, 20.0, 20.0, 20.0, 40.0, 20.0, 20.0, 40.0, 160.0, 40.0, 40.0, 20.0, 20.0, 20.0, 90.0, 160.0, 20.0, 40.0, 40.0, 40.0, 20.0, 40.0, 90.0, 20.0, 90.0, 20.0, 40.0, 20.0, 90.0, 20.0, 40.0, 40.0, 5.0, 20.0, 5.0, 20.0, 20.0, 40.0, 40.0, 20.0, 20.0, 90.0, 90.0, 40.0, 40.0, 90.0, 20.0, 90.0, 20.0, 20.0, 40.0, 90.0, 20.0, 40.0, 20.0, 20.0, 5.0, 90.0, 90.0, 20.0, 20.0, 40.0, 20.0, 5.0, 5.0, 20.0, 5.0, 20.0, 5.0, 20.0, 5.0, 5.0, 5.0, 5.0, 90.0, 90.0, 40.0, 20.0, 160.0, 20.0, 160.0, 90.0, 20.0, 40.0, 90.0, 20.0, 40.0, 90.0, 90.0, 5.0, 40.0, 40.0, 40.0, 20.0, 40.0, 5.0, 5.0, 40.0, 5.0, 20.0, 90.0, 20.0, 90.0, 40.0, 5.0, 5.0, 20.0, 40.0, 90.0, 90.0, 40.0, 20.0, 20.0, 90.0, 20.0, 40.0, 40.0, 40.0, 20.0, 20.0, 5.0, 40.0, 20.0, 5.0, 40.0, 40.0, 40.0, 40.0, 20.0, 40.0, 20.0, 20.0, 20.0, 90.0, 20.0, 5.0, 40.0, 5.0, 40.0, 90.0, 20.0, 5.0, 20.0, 20.0, 160.0, 5.0, 20.0, 5.0, 20.0, 40.0, 20.0, 5.0, 20.0, 40.0, 90.0, 90.0, 20.0, 5.0, 20.0, 5.0, 20.0, 5.0, 20.0, 40.0, 90.0, 20.0, 5.0, 5.0, 20.0, 20.0, 5.0, 20.0, 20.0, 20.0, 40.0, 90.0, 40.0, 20.0, 5.0, 90.0, 40.0, 20.0, 20.0, 90.0, 20.0, 20.0, 20.0, 90.0, 20.0, 90.0, 5.0, 90.0, 5.0, 20.0, 40.0, 40.0, 90.0, 40.0, 5.0, 20.0, 90.0, 40.0, 40.0, 90.0, 40.0, 40.0, 5.0, 40.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 90.0, 20.0, 160.0, 40.0, 5.0, 40.0, 20.0, 20.0, 5.0, 90.0, 20.0, 160.0, 20.0, 90.0, 5.0, 40.0, 40.0, 90.0, 20.0, 40.0, 40.0, 90.0, 90.0, 40.0, 20.0, 5.0, 90.0, 5.0, 20.0, 40.0, 160.0, 20.0, 40.0, 5.0, 40.0, 90.0, 40.0, 20.0, 20.0, 5.0, 40.0, 5.0, 20.0, 20.0, 20.0, 20.0, 90.0, 40.0, 90.0, 40.0, 20.0, 5.0, 5.0, 5.0, 40.0, 90.0, 90.0, 160.0, 40.0, 20.0, 20.0, 5.0, 40.0, 40.0, 20.0, 20.0, 5.0, 90.0, 20.0, 20.0, 5.0, 20.0, 20.0, 20.0, 90.0, 5.0, 20.0, 90.0, 20.0, 160.0, 40.0, 20.0, 40.0, 20.0, 40.0, 90.0, 20.0, 20.0, 90.0, 90.0, 40.0, 20.0, 90.0, 5.0, 40.0, 20.0, 40.0, 40.0, 5.0, 90.0, 40.0, 5.0, 5.0, 20.0, 5.0, 20.0, 20.0, 90.0, 90.0, 20.0, 20.0, 90.0, 40.0, 5.0, 40.0, 20.0, 20.0, 40.0, 20.0, 5.0, 90.0, 20.0, 20.0, 5.0, 40.0, 5.0, 90.0, 90.0, 20.0, 90.0, 40.0, 90.0, 5.0, 5.0, 20.0, 5.0, 40.0, 20.0, 20.0, 5.0, 20.0, 90.0, 420.0, 20.0, 20.0, 40.0, 20.0, 20.0, 20.0, 90.0, 5.0, 5.0, 5.0, 40.0, 90.0, 5.0, 40.0, 20.0, 20.0, 90.0, 20.0, 20.0, 160.0, 20.0, 40.0, 90.0, 20.0, 90.0, 20.0, 20.0, 90.0, 5.0, 40.0, 5.0, 40.0, 20.0, 5.0, 40.0, 20.0, 90.0, 20.0, 20.0, 90.0, 40.0, 5.0, 40.0, 90.0, 40.0, 20.0, 20.0, 40.0, 5.0, 40.0, 160.0, 20.0, 40.0, 20.0, 20.0, 90.0, 40.0, 40.0, 5.0, 40.0, 90.0, 40.0, 40.0, 20.0, 40.0, 5.0, 90.0, 20.0, 90.0, 40.0, 90.0, 20.0, 90.0, 5.0, 40.0, 40.0, 5.0, 5.0, 5.0, 40.0, 5.0, 40.0, 40.0, 20.0, 5.0, 90.0, 5.0, 90.0, 5.0, 40.0, 20.0, 40.0, 20.0, 20.0, 20.0, 20.0, 5.0, 90.0, 20.0, 20.0, 5.0, 90.0, 20.0, 90.0, 20.0, 20.0, 90.0, 90.0, 40.0, 90.0, 20.0, 20.0, 40.0, 40.0, 40.0, 90.0, 40.0, 160.0, 90.0, 20.0, 90.0, 40.0, 40.0, 20.0, 5.0, 40.0, 20.0, 20.0, 40.0, 5.0, 5.0, 90.0, 20.0, 20.0, 40.0, 20.0, 20.0, 5.0, 20.0, 90.0, 90.0, 20.0, 40.0, 5.0, 90.0, 5.0, 20.0, 20.0, 20.0, 40.0, 90.0, 90.0, 40.0, 20.0, 40.0, 90.0, 160.0, 5.0, 5.0, 40.0, 20.0, 5.0, 40.0, 90.0, 5.0, 40.0, 40.0, 90.0, 90.0, 40.0, 5.0, 20.0, 40.0, 40.0, 90.0, 20.0, 90.0, 5.0, 40.0, 20.0, 90.0, 20.0, 90.0, 20.0, 40.0, 20.0, 160.0, 90.0, 160.0, 90.0, 5.0, 5.0, 5.0, 40.0, 20.0, 90.0, 90.0, 40.0, 90.0, 40.0, 20.0, 40.0, 5.0, 20.0, 90.0, 90.0, 5.0, 20.0, 20.0, 20.0, 40.0, 90.0, 420.0, 20.0, 90.0, 5.0, 20.0, 20.0, 90.0, 40.0, 20.0, 90.0, 40.0, 20.0, 160.0, 40.0, 90.0, 90.0, 90.0, 40.0, 40.0, 5.0, 90.0, 90.0, 20.0, 40.0, 90.0, 40.0, 5.0, 90.0, 40.0, 40.0, 5.0, 5.0, 5.0, 20.0, 5.0, 40.0, 20.0, 20.0, 20.0, 160.0, 40.0, 40.0, 40.0, 20.0, 90.0, 20.0, 5.0, 20.0, 90.0, 90.0, 160.0, 5.0, 20.0, 90.0, 90.0, 20.0, 40.0, 20.0, 20.0, 5.0, 20.0, 40.0, 40.0, 40.0, 40.0, 20.0, 40.0, 20.0, 90.0, 20.0, 20.0, 20.0, 90.0, 5.0, 20.0, 90.0, 20.0, 90.0, 20.0, 20.0, 20.0, 20.0, 20.0, 5.0, 20.0, 40.0, 90.0, 90.0, 40.0, 20.0, 40.0, 5.0, 90.0, 40.0, 20.0, 90.0, 90.0, 40.0, 160.0, 40.0, 20.0, 40.0, 5.0, 20.0, 40.0, 20.0, 160.0, 20.0, 5.0, 90.0, 20.0, 90.0, 5.0, 40.0, 20.0, 90.0, 20.0, 5.0, 90.0, 40.0, 5.0, 5.0, 40.0, 40.0, 40.0, 5.0, 40.0, 20.0, 20.0, 20.0, 90.0, 20.0, 5.0, 5.0, 5.0, 40.0, 40.0, 20.0, 20.0, 90.0, 90.0, 40.0, 40.0, 90.0, 160.0, 5.0, 40.0, 5.0, 40.0, 90.0, 40.0, 20.0, 40.0, 20.0, 40.0, 90.0, 20.0, 5.0, 20.0, 20.0, 40.0, 90.0, 90.0, 90.0, 20.0, 40.0, 160.0, 5.0, 5.0, 5.0, 5.0, 90.0, 20.0, 20.0, 40.0, 20.0, 40.0, 5.0, 40.0, 20.0, 40.0, 20.0, 20.0, 5.0, 20.0, 40.0, 5.0, 20.0, 5.0, 40.0, 5.0, 160.0, 20.0, 5.0, 40.0, 90.0, 160.0, 40.0, 40.0, 90.0, 5.0, 160.0, 5.0, 90.0, 40.0, 420.0, 90.0, 40.0, 20.0, 40.0, 5.0, 20.0, 5.0, 5.0, 40.0, 20.0, 5.0, 40.0, 40.0, 40.0, 40.0, 20.0, 20.0, 20.0, 20.0, 40.0, 20.0, 5.0, 160.0, 5.0, 20.0, 20.0, 5.0, 5.0, 90.0, 5.0, 20.0, 40.0, 20.0, 5.0, 5.0, 90.0, 20.0, 20.0, 20.0, 20.0, 20.0, 40.0, 20.0, 90.0, 40.0, 40.0, 40.0, 90.0, 90.0, 5.0, 5.0, 40.0, 20.0, 160.0, 20.0, 5.0, 40.0, 20.0, 5.0, 40.0, 90.0, 5.0, 40.0, 5.0, 90.0, 90.0, 5.0, 90.0, 40.0, 5.0, 90.0, 20.0, 40.0, 20.0, 90.0, 5.0, 40.0, 5.0, 40.0, 40.0, 20.0, 20.0, 90.0, 40.0, 20.0, 5.0, 90.0, 90.0, 40.0, 40.0, 5.0, 90.0, 20.0, 20.0, 5.0, 5.0, 40.0, 20.0, 40.0, 90.0, 20.0, 90.0, 5.0, 5.0, 20.0, 160.0, 20.0, 5.0, 40.0, 20.0, 20.0, 90.0, 20.0, 160.0, 20.0, 20.0, 40.0, 40.0, 40.0, 90.0, 20.0, 90.0, 20.0, 90.0, 20.0, 90.0, 40.0, 5.0, 5.0, 40.0, 20.0, 40.0, 40.0, 40.0, 20.0, 90.0, 5.0, 20.0, 20.0, 40.0, 20.0, 20.0, 5.0, 40.0, 90.0, 20.0, 20.0, 90.0, 20.0, 20.0, 40.0, 20.0, 90.0, 20.0, 20.0, 20.0, 5.0, 20.0, 40.0, 20.0, 40.0, 5.0, 90.0, 20.0, 40.0, 90.0, 40.0, 90.0, 20.0, 40.0, 90.0, 40.0, 20.0, 5.0, 20.0, 40.0, 20.0, 40.0, 5.0, 90.0, 90.0, 90.0, 20.0, 20.0, 20.0, 40.0, 40.0, 40.0, 40.0, 40.0, 20.0, 20.0, 5.0, 5.0, 20.0, 5.0, 90.0, 40.0, 20.0, 20.0, 40.0, 5.0, 40.0, 20.0, 40.0, 5.0, 20.0, 40.0, 20.0, 20.0, 20.0, 5.0, 5.0, 90.0, 40.0, 20.0, 5.0, 20.0, 40.0, 90.0, 20.0, 90.0, 40.0, 20.0, 20.0, 5.0, 20.0, 40.0, 40.0, 40.0, 90.0, 40.0, 40.0, 40.0, 5.0, 20.0, 90.0, 20.0, 90.0, 5.0, 40.0, 5.0, 5.0, 20.0, 40.0, 5.0, 40.0, 20.0, 90.0, 5.0, 20.0, 20.0, 20.0, 20.0, 40.0, 40.0, 20.0, 5.0, 20.0, 90.0, 40.0, 20.0, 20.0, 40.0, 5.0, 40.0, 90.0, 40.0, 40.0, 20.0, 20.0, 90.0, 90.0, 20.0, 20.0, 5.0, 20.0, 20.0, 40.0, 5.0, 90.0, 90.0, 20.0, 90.0, 20.0, 5.0, 40.0, 40.0, 90.0, 20.0, 40.0, 90.0, 5.0, 90.0, 20.0, 40.0, 40.0, 90.0, 20.0, 5.0, 20.0, 40.0, 20.0, 20.0, 40.0, 20.0, 5.0, 20.0, 5.0, 90.0, 40.0, 20.0, 5.0, 20.0, 90.0, 40.0, 20.0, 40.0, 90.0, 5.0, 20.0, 40.0, 90.0, 5.0, 20.0, 40.0, 160.0, 20.0, 5.0, 20.0, 90.0, 40.0, 90.0, 40.0, 5.0, 90.0, 20.0, 40.0, 40.0, 20.0, 40.0, 5.0, 5.0, 5.0, 160.0, 5.0, 20.0, 40.0, 40.0, 90.0, 90.0, 40.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 5.0, 20.0, 20.0, 160.0, 20.0, 90.0, 20.0, 20.0, 20.0, 90.0, 90.0, 20.0, 40.0, 20.0, 20.0, 90.0, 20.0, 20.0, 40.0, 20.0, 40.0, 160.0, 5.0, 5.0, 20.0, 40.0, 40.0, 20.0, 40.0, 5.0, 90.0, 40.0, 90.0, 40.0, 40.0, 20.0, 20.0, 40.0, 90.0, 90.0, 20.0, 90.0, 90.0, 20.0, 5.0, 20.0, 90.0, 20.0, 20.0, 20.0, 90.0, 20.0, 5.0, 160.0, 5.0, 20.0, 40.0, 90.0, 90.0, 20.0, 5.0, 20.0, 90.0, 40.0, 40.0, 20.0, 20.0, 5.0, 20.0, 90.0, 5.0, 5.0, 5.0, 20.0, 40.0, 5.0, 5.0, 40.0, 90.0, 90.0, 90.0, 20.0, 20.0, 40.0, 40.0, 5.0, 160.0, 90.0, 40.0, 5.0, 40.0, 40.0, 40.0, 20.0, 5.0, 90.0, 160.0, 20.0, 90.0, 5.0, 40.0, 160.0, 5.0, 40.0, 420.0, 20.0, 40.0, 20.0, 40.0, 160.0, 40.0, 20.0, 20.0, 5.0, 20.0, 20.0, 40.0, 90.0, 160.0, 20.0, 20.0, 20.0, 40.0, 20.0, 90.0, 40.0, 20.0, 90.0, 40.0, 90.0, 90.0, 5.0, 40.0, 20.0, 40.0, 5.0, 40.0, 20.0, 20.0, 20.0, 40.0, 160.0, 20.0, 40.0, 90.0, 20.0, 20.0, 5.0, 5.0, 5.0, 90.0, 20.0, 5.0, 90.0, 40.0, 20.0, 5.0, 40.0, 20.0, 20.0, 90.0, 20.0, 40.0, 160.0, 90.0, 40.0, 20.0, 5.0, 20.0, 20.0, 40.0, 5.0, 20.0, 40.0, 20.0, 20.0, 20.0, 90.0, 40.0, 90.0], 'episode_lengths': [2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 1, 2, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2]}, 'sampler_perf': {'mean_raw_obs_processing_ms': 1.41686600199995, 'mean_inference_ms': 4.470883941429691, 'mean_action_processing_ms': 0.22201819167087117, 'mean_env_wait_ms': 0.11156443073436481, 'mean_env_render_ms': 0.0}, 'num_faulty_episodes': 0, 'connector_metrics': {'ObsPreprocessorConnector_ms': 0.05739418811511516, 'StateBufferConnector_ms': 0.02733190390024838, 'ViewRequirementAgentConnector_ms': 0.2132370794356765}}","{'training_iteration_time_ms': 12108.729, 'sample_time_ms': 1421.993, 'load_time_ms': 1.007, 'load_throughput': 3970374.858, 'learn_time_ms': 10668.589, 'learn_throughput': 374.932, 'synch_weights_time_ms': 16.738}"




## Thank you!