# Reinforcement learning bots

In [None]:
import sys
import os
import numpy as np

project_root = os.path.abspath("../../..")
if project_root not in sys.path:
    sys.path.insert(0, project_root)

%load_ext autoreload
%autoreload 2

from matplotlib import pyplot as plt

%config InlineBackend.figure_formats = ['svg']
plt.style.use("ggplot")


## Crafting the environment

- How to represent observation space?
  

In [5]:
rng = np.random.default_rng(2137)

In [171]:
from typing import List, Optional
import gymnasium as gym
import numpy as np

from catan.bots.rl.encoding import TileEncoder, TileEncoding
from catan.core.game import Game as CatanGame
from catan.core.models.enums import (
    Action as CatanAction,
    ActionType,
)
from catan.core.models.map import DEFAULT_MAP, LandTile
from catan.bots.rl.rewards import action_to_reward
from catan.core.models.player import Color, Player, RandomPlayer


N_TILES = len(DEFAULT_MAP.land_tiles)
N_NODES = len(DEFAULT_MAP.land_nodes)
N_EDGES = 72


obs_space_encoder = TileEncoder()
OBS_SPACE_SHAPE = (N_TILES, obs_space_encoder.size)

ACTION_SPACE = (
    *N_NODES * (ActionType.BUILD_SETTLEMENT,),
    *N_NODES * (ActionType.BUILD_CITY,),
    *N_EDGES * (ActionType.BUILD_ROAD,),
    *N_TILES * (ActionType.MOVE_ROBBER,),
    *1 * (ActionType.PLAY_KNIGHT_CARD,),
    *1 * (ActionType.PLAY_ROAD_BUILDING,),
    *9 * (ActionType.MARITIME_TRADE,),
    *1 * (ActionType.ROLL,),
    *1 * (ActionType.BUY_DEVELOPMENT_CARD,),
    *1 * (ActionType.DISCARD,),
    *1 * (ActionType.END_TURN,),
)


class RLPlayer(Player):
    def decide(self, game, playable_actions):
        pass


class CatanEnv(gym.Env):
    def __init__(self) -> None:
        self.render_mode = None
        self.np_random = rng

        self.observation_space = gym.spaces.Box(
            low=0,
            high=1,
            shape=OBS_SPACE_SHAPE,
            dtype=np.int8,
        )
        self.action_space = gym.spaces.Discrete(len(ACTION_SPACE))

        self._player_location = self.observation_space
        self.game = CatanGame(players=[RandomPlayer(Color.RED)], catan_map=DEFAULT_MAP)

        self._observations = self._get_initial_obs()

    def _action_to_catan_action(
        env_action: np.int8, playable_actions: List[CatanAction]
    ) -> CatanAction:
        action_type = ACTION_SPACE[env_action]
        first_of_type_idx = ACTION_SPACE.index(action_type)
        num_of_type = ACTION_SPACE.count(action_type)

        if num_of_type == 1:
            return

        if (
            action_type == ActionType.BUILD_SETTLEMENT
            or action_type == ActionType.BUILD_CITY
        ):
            relative_value = env_action - first_of_type_idx
            catan_action = filter(lambda v: v.value == relative_value)

            assert catan_action.value == relative_value

            return catan_action

        raise Exception("Action must be returned")

    def _random_agent_state():
        pass

    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
        super().reset(seed=seed)

    def _get_info(self):
        pass

    def _get_initial_obs(self):
        initial_obs = np.zeros(shape=OBS_SPACE_SHAPE, dtype=np.int8)

        for idx, tile in enumerate(self.game.state.board.map.tiles.values()):
            if isinstance(tile, LandTile):
                resource = (
                    TileEncoding[tile.resource]
                    if tile.resource is not None
                    else TileEncoding.EMPTY
                )
                # print(resource, tile.number)
                initial_obs[idx] = obs_space_encoder.get_initial_encoding(
                    value=tile.number, resource=resource
                )

        return initial_obs

    def _game_state_to_obs(self):
        for idx, tile in enumerate(self.game.state.board.map.tiles.values()):
            if isinstance(tile, LandTile):
                resource = (
                    TileEncoding[tile.resource]
                    if tile.resource is not None
                    else TileEncoding.EMPTY
                )
                # print(resource, tile.number)
                self._observations[idx] = obs_space_encoder.encode_node(
                    encoding=self._observations[idx],
                    player=NodePlayerEncoding,
                    value=tile.number, resource=resource
                )

    def _get_obs(self):
        return self._get_initial_obs()

    def _is_terminated(self):
        return self.game.winning_color() is not None

    def step(self, action: np.int8):
        catan_action = self._action_to_catan_action(action)
        self.game.execute(action)

        next_obs = self._get_obs()
        reward = action_to_reward(catan_action)
        terminated = self._is_terminated()
        truncated = False
        info = self._get_info()

        return next_obs, reward, terminated, truncated, info


env = CatanEnv()

list(env._get_initial_obs())

[array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
 array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
 array([1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
 array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8),
 array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [184]:
from catan.bots.rl.encoding import NodeBuildingEncoding, NodePlayerEncoding
from catan.core.models.player import Color


game = CatanGame(players=[RandomPlayer(Color.RED)])

for _ in range(100):
    game.play_tick()


def vectorize_state(game: CatanGame):
    players = game.state.colors

    # obs =

    for player_color in players:
        buildings = game.state.buildings_by_color[player_color]
        player_enc = NodePlayerEncoding[player_color]

        for tile_value in buildings["SETTLEMENT"]:
            building_enc = NodeBuildingEncoding.SETTLEMENT
            
            settlement_idx = obs_space_encoder.encode_node(
                # encoding=,
                player_encoding=player_enc,
                building_encoding=building_enc,
                node_idx=0
            )
            
            # obs[tile_value]

    # for game.state.buildings_by_color
# vectorize_state(game)

game.state.buildings_by_color[Color.RED]

defaultdict(list, {'SETTLEMENT': [18, 48], 'ROAD': [(18, 40), (48, 49)]})

In [174]:
game.state.board.map.tiles

{(0,
  0,
  0): LandTile(id=0, resource='WHEAT', number=5, nodes={<NodeRef.NORTH: 'NORTH'>: 0, <NodeRef.NORTHEAST: 'NORTHEAST'>: 1, <NodeRef.SOUTHEAST: 'SOUTHEAST'>: 2, <NodeRef.SOUTH: 'SOUTH'>: 3, <NodeRef.SOUTHWEST: 'SOUTHWEST'>: 4, <NodeRef.NORTHWEST: 'NORTHWEST'>: 5}, edges={<EdgeRef.EAST: 'EAST'>: (1, 2), <EdgeRef.SOUTHEAST: 'SOUTHEAST'>: (2, 3), <EdgeRef.SOUTHWEST: 'SOUTHWEST'>: (3, 4), <EdgeRef.WEST: 'WEST'>: (4, 5), <EdgeRef.NORTHWEST: 'NORTHWEST'>: (5, 0), <EdgeRef.NORTHEAST: 'NORTHEAST'>: (0, 1)}),
 (1,
  -1,
  0): LandTile(id=1, resource='SHEEP', number=6, nodes={<NodeRef.NORTH: 'NORTH'>: 6, <NodeRef.NORTHEAST: 'NORTHEAST'>: 7, <NodeRef.SOUTHEAST: 'SOUTHEAST'>: 8, <NodeRef.SOUTH: 'SOUTH'>: 9, <NodeRef.SOUTHWEST: 'SOUTHWEST'>: 2, <NodeRef.NORTHWEST: 'NORTHWEST'>: 1}, edges={<EdgeRef.EAST: 'EAST'>: (7, 8), <EdgeRef.SOUTHEAST: 'SOUTHEAST'>: (8, 9), <EdgeRef.SOUTHWEST: 'SOUTHWEST'>: (9, 2), <EdgeRef.WEST: 'WEST'>: (1, 2), <EdgeRef.NORTHWEST: 'NORTHWEST'>: (1, 6), <EdgeRef.NORTHE

In [59]:
TileEncoding

AttributeError: value