# Reinforcement learning bots

In [None]:
import sys
import os
import numpy as np

project_root = os.path.abspath("../../..")
if project_root not in sys.path:
    sys.path.insert(0, project_root)

%load_ext autoreload
%autoreload 2

from matplotlib import pyplot as plt

%config InlineBackend.figure_formats = ['svg']
plt.style.use("ggplot")


## Crafting the environment

- How to represent observation space?
  

In [5]:
rng = np.random.default_rng(2137)

In [79]:
from collections import defaultdict, namedtuple
from enum import Enum
from typing import Final, List, Optional
import gymnasium as gym
import numpy as np

from catan.core.game import Game as CatanGame
from catan.core.models.enums import (
    BRICK,
    ORE,
    SHEEP,
    WHEAT,
    WOOD,
    Action as CatanAction,
    ActionType,
)
from catan.core.models.map import DEFAULT_MAP
from catan.bots.rl.rewards import action_to_reward
from catan.core.models.player import Player, RandomPlayer


N_TILES = len(DEFAULT_MAP.land_tiles)
N_NODES = len(DEFAULT_MAP.land_nodes)
N_EDGES = 72


class TileEncoding(Enum):
    WOOD = WOOD
    BRICK = BRICK
    SHEEP = SHEEP
    WHEAT = WHEAT
    ORE = ORE
    EMPTY = "EMPTY"
    HAS_ROBBER = "HAS_ROBBER"


class NodeEncoding(Enum):
    SETTLEMENT = "SETTLEMENT"
    CITY = "CITY"
    NONE = "NONE"
    P1 = "P1"
    P2 = "P2"
    P3 = "P3"
    P4 = "P4"


class TileEncoder:
    # def _node_block(self):

    def __init__(self, n_players=4) -> None:
        self.n_players = n_players
        self.node_encoding = (
                NodeEncoding.SETTLEMENT.value,
                NodeEncoding.CITY.value,
                NodeEncoding.NONE.value,
                NodeEncoding.P1.value,
                NodeEncoding.P2.value,
                NodeEncoding.P3.value,
                NodeEncoding.P4.value,
            )
        self.tile_value_encoding = range(2,13)
        self.encoding = (
            TileEncoding.WOOD.value,
            TileEncoding.BRICK.value,
            TileEncoding.SHEEP.value,
            TileEncoding.WHEAT.value,
            TileEncoding.ORE.value,
            TileEncoding.EMPTY.value,
            TileEncoding.HAS_ROBBER.value,
            *self.tile_value_encoding,
            *self.n_players * self.node_encoding,
        )
        
        self.node_encoding_start = self.encoding.index(self.node_encoding[0])
        self.shape = tuple(np.ones(len(self.encoding), dtype=np.uint8))

    def encode(self, vector):
        pass
    
    
    
obs_space_encoder = TileEncoder()
OBS_SPACE_SHAPE = obs_space_encoder.shape

ACTION_SPACE = (
    *N_NODES * (ActionType.BUILD_SETTLEMENT,),
    *N_NODES * (ActionType.BUILD_CITY,),
    *N_EDGES * (ActionType.BUILD_ROAD,),
    *N_TILES * (ActionType.MOVE_ROBBER,),
    *1 * (ActionType.PLAY_KNIGHT_CARD,),
    *1 * (ActionType.PLAY_ROAD_BUILDING,),
    *9 * (ActionType.MARITIME_TRADE,),
    *1 * (ActionType.ROLL,),
    *1 * (ActionType.BUY_DEVELOPMENT_CARD,),
    *1 * (ActionType.DISCARD,),
    *1 * (ActionType.END_TURN,),
)


class RLPlayer(Player):
    def decide(self, game, playable_actions):
        pass


class CatanEnv(gym.Env):
    def __init__(self) -> None:
        self.render_mode = None
        self.np_random = rng

        self.observation_space = gym.spaces.Box(
            low=0,
            high=1,
            shape=OBS_SPACE_SHAPE,
            dtype=np.int8,
        )
        self.action_space = gym.spaces.Discrete(len(ACTION_SPACE))

        self._player_location = self.observation_space
        self._game = CatanGame(players=[RandomPlayer])

    def _action_to_catan_action(
        env_action: np.int8, playable_actions: List[CatanAction]
    ) -> CatanAction:
        action_type = ACTION_SPACE[env_action]
        first_of_type_idx = ACTION_SPACE.index(action_type)
        num_of_type = ACTION_SPACE.count(action_type)

        if num_of_type == 1:
            return

        if (
            action_type == ActionType.BUILD_SETTLEMENT
            or action_type == ActionType.BUILD_CITY
        ):
            relative_value = env_action - first_of_type_idx
            catan_action = filter(lambda v: v.value == relative_value)

            assert catan_action.value == relative_value

            return catan_action

        raise Exception("Action must be returned")

    def _random_agent_state():
        pass

    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None):
        super().reset(seed=seed)

    def _get_info(self):
        pass

    def _is_terminated(self):
        return self._game.winning_color() is not None

    def step(self, action: np.int8):
        catan_action = self._action_to_catan_action(action)
        self._game.execute(action)

        next_obs = self._get_obs()
        reward = action_to_reward(catan_action)
        terminated = self._is_terminated()
        truncated = False
        info = self._get_info()

        return next_obs, reward, terminated, truncated, info

In [80]:
from catan.core.models.player import Color


game = CatanGame(players=[RandomPlayer(Color.RED)])
# game.play_tick()

# game.state.board.map.

# ACTION_SPACE.count(ActionType.BUILD_CITY)
# game.state.playable_actions


# np.ones(shape=(N_EDGES,))
# defaultdict(lambda: 0, ["1"])
obs_space_encoder.encoding

('WOOD',
 'BRICK',
 'SHEEP',
 'WHEAT',
 'ORE',
 'EMPTY',
 'HAS_ROBBER',
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 'SETTLEMENT',
 'CITY',
 'NONE',
 'P1',
 'P2',
 'P3',
 'P4',
 'SETTLEMENT',
 'CITY',
 'NONE',
 'P1',
 'P2',
 'P3',
 'P4',
 'SETTLEMENT',
 'CITY',
 'NONE',
 'P1',
 'P2',
 'P3',
 'P4',
 'SETTLEMENT',
 'CITY',
 'NONE',
 'P1',
 'P2',
 'P3',
 'P4')

In [None]:
TileEncoding

AttributeError: value