# Provided code

In [1]:
# Install open spiel
!pip install --upgrade open_spiel



In [2]:
# Imports
import numpy as np

from open_spiel.python import rl_agent
from open_spiel.python import rl_environment
import pyspiel

In [3]:
# Some helper classes and functions.
# DO NOT CHANGE.

class BotAgent(rl_agent.AbstractAgent):
  """Agent class that wraps a bot.

  Note, the environment must include the OpenSpiel state in its observations,
  which means it must have been created with use_full_state=True.

  This is a simple wrapper that lets the RPS bots be interpreted as agents under
  the RL API.
  """

  def __init__(self, num_actions, bot, name="bot_agent"):
    assert num_actions > 0
    self._bot = bot
    self._num_actions = num_actions

  def restart(self):
    self._bot.restart()

  def step(self, time_step, is_evaluation=False):
    # If it is the end of the episode, don't select an action.
    if time_step.last():
      return
    _, state = pyspiel.deserialize_game_and_state(
        time_step.observations["serialized_state"])
    action = self._bot.step(state)
    probs = np.zeros(self._num_actions)
    probs[action] = 1.0
    return rl_agent.StepOutput(action=action, probs=probs)


#  We will use this function to evaluate the agents. Do not change.

def eval_agents(env, agents, num_players, num_episodes, verbose=False):
  """Evaluate the agent.

  Runs a number of episodes and returns the average returns for each agent as
  a numpy array.

  Arguments:
    env: the RL environment,
    agents: a list of agents (size 2),
    num_players: number of players in the game (for RRPS, this is 2),
    num_episodes: number of evaluation episodes to run.
    verbose: whether to print updates after each episode.
  """
  sum_episode_rewards = np.zeros(num_players)
  for ep in range(num_episodes):
    for agent in agents:
      # Bots need to be restarted at the start of the episode.
      if hasattr(agent, "restart"):
        agent.restart()
    time_step = env.reset()
    episode_rewards = np.zeros(num_players)
    while not time_step.last():
      agents_output = [
          agent.step(time_step, is_evaluation=True) for agent in agents
      ]
      action_list = [agent_output.action for agent_output in agents_output]
      print('action_list:', action_list)
      raise
      time_step = env.step(action_list)
      episode_rewards += time_step.rewards
    sum_episode_rewards += episode_rewards
    if verbose:
      print(f"Finished episode {ep}, "
            + f"avg returns: {sum_episode_rewards / (ep+1)}")

  return sum_episode_rewards / num_episodes


def print_roshambo_bot_names_and_ids(roshambo_bot_names):
  print("Roshambo bot population:")
  for i in range(len(roshambo_bot_names)):
    print(f"{i}: {roshambo_bot_names[i]}")

def create_roshambo_bot_agent(player_id, num_actions, bot_names, pop_id):
  name = bot_names[pop_id]
  # Creates an OpenSpiel bot with the default number of throws
  # (pyspiel.ROSHAMBO_NUM_THROWS). To create one for a different number of
  # throws per episode, add the number as the third argument here.
  bot = pyspiel.make_roshambo_bot(player_id, name)
  return BotAgent(num_actions, bot, name=name)


In [5]:
print("Loading bot population...")
pop_size = pyspiel.ROSHAMBO_NUM_BOTS
print(f"Population size: {pop_size}")
roshambo_bot_names = pyspiel.roshambo_bot_names()
roshambo_bot_names.sort()
print_roshambo_bot_names_and_ids(roshambo_bot_names)

bot_id = 0
roshambo_bot_ids = {}
for name in roshambo_bot_names:
  roshambo_bot_ids[name] = bot_id
  bot_id += 1

Loading bot population...
Population size: 43
Roshambo bot population:
0: actr_lag2_decay
1: adddriftbot2
2: addshiftbot3
3: antiflatbot
4: antirotnbot
5: biopic
6: boom
7: copybot
8: debruijn81
9: driftbot
10: flatbot3
11: foxtrotbot
12: freqbot2
13: granite
14: greenberg
15: halbot
16: inocencio
17: iocainebot
18: marble
19: markov5
20: markovbails
21: mixed_strategy
22: mod1bot
23: multibot
24: peterbot
25: phasenbott
26: pibot
27: piedra
28: predbot
29: r226bot
30: randbot
31: robertot
32: rockbot
33: rotatebot
34: russrocker4
35: shofar
36: sunCrazybot
37: sunNervebot
38: sweetrock
39: switchalot
40: switchbot
41: textbot
42: zq_move


In [6]:
# # Example: create an RL environment, and two agents from the bot population and
# # evaluate these two agents head-to-head.

# # Note that the include_full_state variable has to be enabled because the
# # BotAgent needs access to the full state.
# env = rl_environment.Environment(
#     "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
#     f"{pyspiel.ROSHAMBO_NUM_THROWS}," +
#     f"recall={RECALL})",
#     include_full_state=True)
# num_players = 2
# num_actions = env.action_spec()["num_actions"]
# # Learning agents might need this:
# # info_state_size = env.observation_spec()["info_state"][0]

# # Create two bot agents
# p0_pop_id = 0   # actr_lag2_decay
# p1_pop_id = 1   # adddriftbot2
# agents = [
#     create_roshambo_bot_agent(0, num_actions, roshambo_bot_names, p0_pop_id),
#     create_roshambo_bot_agent(1, num_actions, roshambo_bot_names, p1_pop_id)
# ]

# print("Starting eval run.")
# avg_eval_returns = eval_agents(env, agents, num_players, 10, verbose=True)

# print("Avg return ", avg_eval_returns)

# My own code

Altered or completely new code. Used to generate training data for LSTM

In [7]:
RECALL = 20
env = rl_environment.Environment(
    "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
    f"{pyspiel.ROSHAMBO_NUM_THROWS}," +
    f"recall={RECALL})",
    include_full_state=True)
num_players = 2
num_actions = env.action_spec()["num_actions"]

In [21]:
import numpy as np
from open_spiel.python import rl_environment
import pyspiel

# Import your helpers (names may differ in your codebase)
# from roshambo_population import create_roshambo_bot_agent, roshambo_bot_names
# from my_random_agent import RandomAgent  # or any simple baseline agent

class MyAgent(rl_agent.AbstractAgent):
    """
    Greenberg-lite ensemble agent for repeated RPS:

    - Maintains several simple "expert" strategies (predictors + best-response).
    - Tracks a score for each expert based on how well it *would have* done.
    - At each step, picks the action of the best-scoring expert (with some
      epsilon exploration).
    """

    def __init__(self,
                 player_id,
                 num_actions=3,
                 score_alpha=0.1,      # how fast expert scores update
                 epsilon_action=0.1,  # randomization over actions
                 epsilon_expert=0.05,  # randomization over experts
                 name="ensemble_agent"
                #  lstm_model_path="lstm_rps.pt",
                 ):
        super().__init__(player_id=player_id, name=name)
        assert num_actions == 3, "RRPS uses 3 actions (R,P,S)."
        self._player_id = player_id
        self._num_actions = num_actions

        self._score_alpha = score_alpha
        self._eps_action = epsilon_action
        self._eps_expert = epsilon_expert

        # Payoff matrix from *my* perspective
        # rows = my action, cols = opponent action
        # 0: Rock, 1: Paper, 2: Scissors
        self._payoff = np.array([
            [ 0, -1,  1],  # I play R
            [ 1,  0, -1],  # I play P
            [-1,  1,  0],  # I play S
        ], dtype=float)

        # Number of experts we'll define
        self._num_experts = 4  # you can add more if you want
        # self._lstm_seq_len = lstm_seq_len
        # self._device = torch.device("cpu")  # or "cuda" if allowed
        # self._lstm_model = torch.load(lstm_model_path, map_location=self._device)
        # self._lstm_model.eval()
        # self._lstm_hidden = None

        self.restart()

    # ------------ Episode reset ------------

    def restart(self):
        # Opponent statistics
        self._opp_counts = np.ones(self._num_actions, dtype=float)  # smoothed counts
        self._trans_counts = np.ones((self._num_actions, self._num_actions),
                                     dtype=float)  # P(opp_t | opp_{t-1})

        self._last_opp_action = None
        self._last_my_action = None

        # Expert scores and last actions they recommended
        self._expert_scores = np.zeros(self._num_experts, dtype=float)
        self._last_expert_actions = [None] * self._num_experts

        # Track how much of history we've processed (if we use it later)
        self._last_history_len = 0

        # self._opp_hist = []
        # self._lstm_hidden = None

    # ------------ Utility helpers ------------

    def _beat(self, move):
        #Return the action that beats 'move'
        if move is None:
            return np.random.randint(self._num_actions)
        return (move + 1) % self._num_actions

    def _update_opp_stats(self, state):
        #Update opponent statistics based on game history.
        history = state.history()
        if len(history) == 0:
            return None

        action = history[-1]
        # Update counts
        self._opp_counts[action] += 1
        if self._last_opp_action is not None:
            self._trans_counts[self._last_opp_action, action] += 1
        self._last_opp_action = action
        # self._opp_hist.append(action)
        return action
    

    # ------------ Expert strategies ------------

    def _expert_actions(self):
        #Compute each expert's recommended action for step.
        actions = []

        # Expert 0: Frequency-based best response
        # Predict opp's most frequent move overall.
        opp_probs = self._opp_counts / np.sum(self._opp_counts)
        pred0 = int(np.argmax(opp_probs))
        actions.append(self._beat(pred0))

        # Expert 1: Last-opponent-move best response
        # Predict they repeat their last move.
        actions.append(self._beat(self._last_opp_action))

        # Expert 2: Markov(1) best response
        # Predict based on last opp move -> next opp move.
        if self._last_opp_action is not None:
            row = self._trans_counts[self._last_opp_action]
            pred2 = int(np.argmax(row))
            actions.append(self._beat(pred2))
        else:
            # fallback to frequency BR
            actions.append(self._beat(pred0))

        # Expert 3: Mirror-me assumption
        # Predict opp plays what I played last time.
        actions.append(self._beat(self._last_my_action))

        # # Expert 4: ML (LSTM)
        # pred_lstm = self._lstm_predict_opp()
        # if pred_lstm is not None:
        #     actions.append(self._beat(pred_lstm))
        # else:
        #     # fallback: behave like frequency BR
        #     actions.append(self._beat(pred0))


        return actions

    # ------------ Expert scoring ------------

    def _update_expert_scores(self, last_opp_action):
        """
        Update each expert's score based on what happened in the *previous* round.

        We use the payoff matrix: if expert i had played its suggested action,
        what reward would it have gotten vs last_opp_action?
        """
        if last_opp_action is None:
            return  # nothing to update on the first move

        for i in range(self._num_experts):
            a_i = self._last_expert_actions[i]
            if a_i is None:
                continue
            payoff_i = self._payoff[a_i, last_opp_action]
            # Exponential moving average of payoff
            self._expert_scores[i] = (
                (1 - self._score_alpha) * self._expert_scores[i]
                + self._score_alpha * payoff_i
            )

    # ------------ Main step ------------

    def step(self, time_step, is_evaluation=False):
        # Terminal: return dummy
        if time_step.last():
            probs = np.ones(self._num_actions) / self._num_actions
            return rl_agent.StepOutput(action=0, probs=probs)

        # Deserialize game state
        game, state = pyspiel.deserialize_game_and_state(
            time_step.observations["serialized_state"])

        # 1) Update opponent stats (includes last_opp_action)
        last_opp_action = self._update_opp_stats(state)

        # 2) Update expert scores based on previous round outcome
        # We don't need time_step.rewards here; we recompute payoff from matrix.
        self._update_expert_scores(last_opp_action)

        # 3) Each expert proposes an action for THIS round
        expert_actions = self._expert_actions()
        self._last_expert_actions = list(expert_actions)  # store for next scoring

        # 4) Choose which expert to follow (epsilon-greedy over expert scores)
        if np.random.rand() < self._eps_expert:
            chosen_expert = np.random.randint(self._num_experts)
        else:
            chosen_expert = int(np.argmax(self._expert_scores))

        chosen_action = expert_actions[chosen_expert]

        # 5) Add action-level randomness for robustness
        if np.random.rand() < self._eps_action:
            action = np.random.randint(self._num_actions)
        else:
            action = chosen_action

        # Save my last action for next round's mirror expert
        self._last_my_action = action

        # 6) Build probability distribution (mostly on chosen_action)
        probs = np.ones(self._num_actions) * (self._eps_action / self._num_actions)
        probs[chosen_action] += 1.0 - self._eps_action

        return rl_agent.StepOutput(action=action, probs=probs)
    


def generate_rrps_data(
    roshambo_bot_names,
    create_roshambo_bot_agent,
    num_episodes_per_bot=50,
    horizon=1000,
    output_path="rrps_lstm_data.npz",
):
    """
    Generate training data for an LSTM that predicts opponent moves.

    For each bot in the population:
      - Play num_episodes_per_bot episodes
      - Each episode has up to `horizon` steps (num_repetitions)
      - Record the sequence of opponent actions (0=R,1=P,2=S)

    Saves a .npz file with:
      - episodes: list of lists of ints (opponent moves)
      - bot_ids:  list of int bot indices (aligned with episodes)
      - bot_names: list of strings (names for reference)
    """

    # ----- Build the repeated RPS game -----
    # Adjust if your game is constructed differently.
    # game = pyspiel.load_game(
    #     "repeated_game",
    #     {
    #         "num_repetitions": horizon,
    #         "stage_game": "matrix_rps",
    #     },
    # )
    # env = rl_environment.Environment(game)

    env = rl_environment.Environment(
    "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
    f"{pyspiel.ROSHAMBO_NUM_THROWS}," +
    f"recall={RECALL})",
    include_full_state=True)

    num_actions = 3
    num_players = 2

    all_episodes = []
    all_bot_ids = []

    for bot_id, bot_name in enumerate(roshambo_bot_names):
        print(f"Collecting data vs bot {bot_id}: {bot_name}")


        class UniformAgent:
            def __init__(self, player_id, num_actions=3):
                self.player_id = player_id
                self.num_actions = num_actions
            def restart(self):
                pass
            def step(self, time_step, is_evaluation=False):
                if time_step.last():
                    # Dummy output, env ignores action at terminal
                    return type("Out", (), {"action": 0})
                a = np.random.randint(self.num_actions)
                return type("Out", (), {"action": int(a)})

        my_agent = MyAgent(player_id=0, num_actions=num_actions)
        # my_agent = create_roshambo_bot_agent(
        #     player_id=0,
        #     num_actions=num_actions,
        #     roshambo_bot_names=roshambo_bot_names,
        #     roshambo_bot_index=bot_id,
        # )
        opp_agent = create_roshambo_bot_agent(
            1,
            num_actions,
            roshambo_bot_names,
            bot_id,
        )

        for ep in range(num_episodes_per_bot):
            # Restart agents if they support it
            if hasattr(my_agent, "restart"):
                my_agent.restart()
            if hasattr(opp_agent, "restart"):
                opp_agent.restart()

            time_step = env.reset()
            opp_actions = []

            while not time_step.last():
                # Collect actions from both players
                outputs = [
                    my_agent.step(time_step, is_evaluation=True),
                    opp_agent.step(time_step, is_evaluation=True),
                ]
                action_list = [out.action for out in outputs]

                # Here we assume my_agent is player 0, opp_agent is player 1:
                opp_action = action_list[1]
                opp_actions.append(int(opp_action))

                # Step environment
                time_step = env.step(action_list)

            all_episodes.append(opp_actions)
            all_bot_ids.append(bot_id)

            if (ep + 1) % 10 == 0:
                print(f"  Bot {bot_name}: episode {ep+1}/{num_episodes_per_bot}")

    # Save as a numpy object array (list-of-lists) plus labels.
    episodes_arr = np.array(all_episodes, dtype=object)
    bot_ids_arr = np.array(all_bot_ids, dtype=np.int32)

    np.savez(
        output_path,
        episodes=episodes_arr,
        bot_ids=bot_ids_arr,
        bot_names=np.array(roshambo_bot_names, dtype=object),
    )
    print(f"Saved dataset to {output_path}")


# Example usage (adjust imports / names to your actual code):
# from roshambo_population import create_roshambo_bot_agent, roshambo_bot_names
# generate_rrps_data(roshambo_bot_names, create_roshambo_bot_agent)

In [22]:
generate_rrps_data(roshambo_bot_names, create_roshambo_bot_agent)

Collecting data vs bot 0: actr_lag2_decay
  Bot actr_lag2_decay: episode 10/50
  Bot actr_lag2_decay: episode 20/50
  Bot actr_lag2_decay: episode 30/50
  Bot actr_lag2_decay: episode 40/50
  Bot actr_lag2_decay: episode 50/50
Collecting data vs bot 1: adddriftbot2
  Bot adddriftbot2: episode 10/50
  Bot adddriftbot2: episode 20/50
  Bot adddriftbot2: episode 30/50
  Bot adddriftbot2: episode 40/50
  Bot adddriftbot2: episode 50/50
Collecting data vs bot 2: addshiftbot3
  Bot addshiftbot3: episode 10/50
  Bot addshiftbot3: episode 20/50
  Bot addshiftbot3: episode 30/50
  Bot addshiftbot3: episode 40/50
  Bot addshiftbot3: episode 50/50
Collecting data vs bot 3: antiflatbot
  Bot antiflatbot: episode 10/50
  Bot antiflatbot: episode 20/50
  Bot antiflatbot: episode 30/50
  Bot antiflatbot: episode 40/50
  Bot antiflatbot: episode 50/50
Collecting data vs bot 4: antirotnbot
  Bot antirotnbot: episode 10/50
  Bot antirotnbot: episode 20/50
  Bot antirotnbot: episode 30/50
  Bot antirotnb

In [23]:
import numpy as np

data = np.load("rrps_lstm_data.npz", allow_pickle=True)
print("Keys:", data.files)

episodes = data["episodes"]
bot_ids = data["bot_ids"]
bot_names = data["bot_names"]

print("episodes dtype:", episodes.dtype)
print("num episodes:", len(episodes))
print("bot_ids shape:", bot_ids.shape)
print("bot_names:", bot_names)

# Peek at first few episodes
for i in range(min(3, len(episodes))):
    print(f"\nEpisode {i}, bot_id={bot_ids[i]}:")
    print("length:", len(episodes[i]))
    print("moves:", episodes[i][:20], "...")

Keys: ['episodes', 'bot_ids', 'bot_names']
episodes dtype: object
num episodes: 2150
bot_ids shape: (2150,)
bot_names: ['actr_lag2_decay' 'adddriftbot2' 'addshiftbot3' 'antiflatbot'
 'antirotnbot' 'biopic' 'boom' 'copybot' 'debruijn81' 'driftbot'
 'flatbot3' 'foxtrotbot' 'freqbot2' 'granite' 'greenberg' 'halbot'
 'inocencio' 'iocainebot' 'marble' 'markov5' 'markovbails'
 'mixed_strategy' 'mod1bot' 'multibot' 'peterbot' 'phasenbott' 'pibot'
 'piedra' 'predbot' 'r226bot' 'randbot' 'robertot' 'rockbot' 'rotatebot'
 'russrocker4' 'shofar' 'sunCrazybot' 'sunNervebot' 'sweetrock'
 'switchalot' 'switchbot' 'textbot' 'zq_move']

Episode 0, bot_id=0:
length: 1000
moves: [0 1 1 0 1 1 1 2 2 0 1 0 1 0 1 0 0 1 1 0] ...

Episode 1, bot_id=0:
length: 1000
moves: [1 0 0 2 2 2 2 0 0 1 2 2 0 2 2 2 1 2 0 0] ...

Episode 2, bot_id=0:
length: 1000
moves: [1 2 2 2 0 1 0 1 0 0 2 0 1 2 1 0 2 1 1 2] ...


The below cell is used to generate training data. It runs games with one specific bot (in this case Greenberg) against randbot. This data was used to train the first iteration of the model (the one focused purely on beating Greenberg).

In [10]:
from tqdm import tqdm

# Generate training data for specified bot
botName = 'greenberg'
botId = roshambo_bot_ids[botName]

env = rl_environment.Environment(
    "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
    f"{pyspiel.ROSHAMBO_NUM_THROWS}," +
    f"recall={RECALL})",
    include_full_state=True)
num_players = 2
num_actions = env.action_spec()["num_actions"]

trainBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(0, botName), name=botName)  # Bot for whom train data being generated
# randBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(1, 'randbot'), name='randbot')
randBot = myAgent
agents = [trainBot, randBot]

# Note: data is generated/saved in sets to avoid losing data if Colab kicks me off
# With these numbers, each set takes ~3.5 mins (~2.1 sec/run)
numSets = 30   # Number of sets of data to generate
numRuns = 100  # Number of runs to generate training data for per set

for setNum in range(numSets):
  runData = []  # Contains data for each run

  for run in tqdm(range(numRuns), desc=f"Set {setNum+1} / {numSets}"):
    # Reset variables for new run
    randBot.restart()
    trainBot.restart()
    time_step = env.reset()

    result = []  # Contains move data for this game

    while not time_step.last():
      actionList = [agent.step(time_step, is_evaluation=True).action for agent in agents]
      result.append(actionList)
      time_step = env.step(actionList)

    runData.append(np.array(result, dtype=np.uint8))

  if len(runData) != numRuns:
    print(f"Warning: length should be {numRuns} but is {len(runData)}")

  runData = np.array(runData, dtype=np.uint8)
  print('Data shape:', runData.shape)

  # Save np array containing results for this set to google drive
  np.save(f'/content/drive/My Drive/CS486A4/Greenberg_data_LSTMv1/{botName}_{setNum}.npy', runData)

NameError: name 'myAgent' is not defined

The below cell is used to generate data. It runs games with one specific bot (in this case Greenberg) against every other bot. The data from this cell was not used in either of the models discussed in the report.

In [None]:
# Unlike the above cell, this one focuses on generating data from different agents vs greenberg

from tqdm import tqdm

# Generate training data for specified bot
botName = 'greenberg'
botId = roshambo_bot_ids[botName]

# List of agents that always play same sequence of moves (so don't need to run them a lot)
deterministic_agents = [
    'rockbot', 'rotatebot', 'pibot', 'debruijn81', 'textbot'
]

env = rl_environment.Environment(
    "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
    f"{pyspiel.ROSHAMBO_NUM_THROWS}," +
    f"recall={RECALL})",
    include_full_state=True)
num_players = 2
num_actions = env.action_spec()["num_actions"]

trainBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(0, botName), name=botName)  # Bot for whom train data being generated
# randBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(1, 'randbot'), name='randbot')
# agents = [trainBot, randBot]

numRuns = 100  # Number of runs to generate training data for per model

for name, id in roshambo_bot_ids.items():
  runData = []  # Contains data for each run

  dataBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(1, name), name=name)
  agents = [trainBot, dataBot]

  # Run less times if this is a deterministic agent
  runTimes = 5 if name in deterministic_agents else numRuns

  for run in tqdm(range(runTimes), desc=f"Agent {name} ({id + 1} / {len(roshambo_bot_names)})"):
    # Reset variables for new run
    time_step = env.reset()
    trainBot.restart()
    if hasattr(dataBot, 'restart'):
      dataBot.restart()

    result = []  # Contains move data for this game

    while not time_step.last():
      actionList = [agent.step(time_step, is_evaluation=True).action for agent in agents]
      result.append(actionList)
      time_step = env.step(actionList)

    runData.append(np.array(result, dtype=np.uint8))

  if len(runData) != numRuns:
    print(f"Warning: length should be {numRuns} but is {len(runData)}")

  runData = np.array(runData, dtype=np.uint8)
  print('Data shape:', runData.shape)

  # Save np array containing results for this set to google drive
  np.save(f'/content/drive/My Drive/Greenberg_AllBots_Data/{botName}-VS-{name}.npy', runData)

The below cell is used to generate data. It runs games with every bot against randbot. The data from this cell was used in the second model discussed in the report (the one that can be generalized to other models).

This cell generates 150 games for each bot. If generating data on Colab, I recommend making 3 copies of this notebook and running all 3 at once to speed up data creation.

In [None]:
# Unlike the above cell, this one focuses on generating data from different agents vs randbot

from tqdm import tqdm

# List of agents that always play same sequence of moves (unused)
deterministic_agents = [
    'rockbot', 'rotatebot', 'pibot', 'debruijn81', 'textbot'
]

env = rl_environment.Environment(
    "repeated_game(stage_game=matrix_rps(),num_repetitions=" +
    f"{pyspiel.ROSHAMBO_NUM_THROWS}," +
    f"recall={RECALL})",
    include_full_state=True)
num_players = 2
num_actions = env.action_spec()["num_actions"]

# trainBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(0, botName), name=botName)  # Bot for whom train data being generated
randBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(0, 'randbot'), name='randbot')
# agents = [trainBot, randBot]

numRuns = 150  # Number of runs to generate training data for per model

for name, id in roshambo_bot_ids.items():
  runData = []  # Contains data for each run

  trainBot = BotAgent(num_actions, pyspiel.make_roshambo_bot(0, name), name=name)
  agents = [trainBot, randBot]

  # Run less times if this is a deterministic agent
  # runTimes = 5 if name in deterministic_agents else numRuns
  runTimes = numRuns

  for run in tqdm(range(runTimes), desc=f"Agent {name} ({id + 1} / {len(roshambo_bot_names)})"):
    # Reset variables for new run
    time_step = env.reset()
    randBot.restart()
    if hasattr(trainBot, 'restart'):
      trainBot.restart()

    result = []  # Contains move data for this game

    while not time_step.last():
      actionList = [agent.step(time_step, is_evaluation=True).action for agent in agents]
      result.append(actionList)
      time_step = env.step(actionList)

    runData.append(np.array(result, dtype=np.uint8))

  if len(runData) != numRuns:
    print(f"Warning: length should be {numRuns} but is {len(runData)}")

  runData = np.array(runData, dtype=np.uint8)
  print('Data shape:', runData.shape)

  # Save np array containing results for this set to google drive
  np.save(f'/content/drive/My Drive/CS486A4/AllBots_vs_random/{name}-VS-randbot-7.npy', runData)

Agent actr_lag2_decay (1 / 43): 100%|██████████| 150/150 [04:50<00:00,  1.94s/it]


Data shape: (150, 1000, 2)


Agent adddriftbot2 (2 / 43): 100%|██████████| 150/150 [04:50<00:00,  1.94s/it]


Data shape: (150, 1000, 2)


Agent addshiftbot3 (3 / 43): 100%|██████████| 150/150 [04:51<00:00,  1.94s/it]


Data shape: (150, 1000, 2)


Agent antiflatbot (4 / 43): 100%|██████████| 150/150 [04:50<00:00,  1.94s/it]


Data shape: (150, 1000, 2)


Agent antirotnbot (5 / 43): 100%|██████████| 150/150 [04:51<00:00,  1.94s/it]


Data shape: (150, 1000, 2)


Agent biopic (6 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent boom (7 / 43): 100%|██████████| 150/150 [04:57<00:00,  1.98s/it]


Data shape: (150, 1000, 2)


Agent copybot (8 / 43): 100%|██████████| 150/150 [04:58<00:00,  1.99s/it]


Data shape: (150, 1000, 2)


Agent debruijn81 (9 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent driftbot (10 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent flatbot3 (11 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent foxtrotbot (12 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent freqbot2 (13 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent granite (14 / 43): 100%|██████████| 150/150 [04:52<00:00,  1.95s/it]


Data shape: (150, 1000, 2)


Agent greenberg (15 / 43): 100%|██████████| 150/150 [05:17<00:00,  2.12s/it]


Data shape: (150, 1000, 2)


Agent halbot (16 / 43): 100%|██████████| 150/150 [05:02<00:00,  2.01s/it]


Data shape: (150, 1000, 2)


Agent inocencio (17 / 43): 100%|██████████| 150/150 [04:58<00:00,  1.99s/it]


Data shape: (150, 1000, 2)


Agent iocainebot (18 / 43): 100%|██████████| 150/150 [04:59<00:00,  2.00s/it]


Data shape: (150, 1000, 2)


Agent marble (19 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent markov5 (20 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent markovbails (21 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent mixed_strategy (22 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent mod1bot (23 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent multibot (24 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent peterbot (25 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent phasenbott (26 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent pibot (27 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent piedra (28 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent predbot (29 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent r226bot (30 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent randbot (31 / 43): 100%|██████████| 150/150 [04:53<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent robertot (32 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent rockbot (33 / 43): 100%|██████████| 150/150 [04:57<00:00,  1.98s/it]


Data shape: (150, 1000, 2)


Agent rotatebot (34 / 43): 100%|██████████| 150/150 [04:56<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent russrocker4 (35 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent shofar (36 / 43): 100%|██████████| 150/150 [04:56<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent sunCrazybot (37 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent sunNervebot (38 / 43): 100%|██████████| 150/150 [04:58<00:00,  1.99s/it]


Data shape: (150, 1000, 2)


Agent sweetrock (39 / 43): 100%|██████████| 150/150 [04:55<00:00,  1.97s/it]


Data shape: (150, 1000, 2)


Agent switchalot (40 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent switchbot (41 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent textbot (42 / 43): 100%|██████████| 150/150 [04:54<00:00,  1.96s/it]


Data shape: (150, 1000, 2)


Agent zq_move (43 / 43): 100%|██████████| 150/150 [04:57<00:00,  1.98s/it]

Data shape: (150, 1000, 2)



