In [29]:
import random
from collections import Counter
import numpy as np
import copy
import ast
from scipy.stats import rankdata


# Different in rules: Multiple Wasabi can act on a single sushi
# No tie breaker

CARD_ON_BOARD = {
    0: 'Sashimi',
    1: 'Egg Nigiri',
    2: 'Salmon Nigiri',
    3: 'Squid Nigiri',
    4: 'Wasabi Egg',
    5: 'Wasabi Salmon',
    6: 'Wasabi Squid',
    7: 'Wasabi',
    8: 'Tempura',
    9: 'Dumpling',
    10: 'Maki',
    11: 'Pudding',
    12: 'Chopsticks',
}

CARDS = {
    0: 'Sashimi',
    1: 'Egg Nigiri',
    2: 'Salmon Nigiri',
    3: 'Squid Nigiri',
    4: 'Wasabi',
    5: 'Tempura',
    6: 'Dumpling',
    7: '1 Maki',
    8: '2 Maki',
    9: '3 Maki',
    10: 'Pudding',  # Not implemented
    11: 'Chopsticks',  # Not implemented
}


def get_score(board):
    score = 0
    score += board[0] // 3 * 10  # Salmon Nigiri
    score += board[1] * 1  # Sashimi
    score += board[2] * 2  # Squid Nigiri
    score += board[3] * 3  # Egg Nigiri
    score += board[4] * 3  # Wasabi Egg
    score += board[5] * 6  # Wasabi Salmon
    score += board[6] * 9  # Wasabi Squid
    score += board[8] // 2 * 5  # Tempura
    # Dumpling
    if board[9] == 1:
        score += 1
    if board[9] == 2:
        score += 3
    if board[9] == 3:
        score += 6
    if board[9] == 4:
        score += 10
    if board[9] > 4:
        score += ((board[9] - 4) * 5 + 10)
    return score


def add_a_card_to_board(board, card):
    if card == 0:
        board[0] += 1
    if card in [1, 2, 3]:
        if board[7] > 0:
            wasabi_cnt = board[7]
            board[7] = 0
            board[card + 3] += wasabi_cnt  # For each wasabi, add a wasabi combo (combo is always +3 index)
        else:
            board[card] += 1
    if card == 4:
        board[7] += 1
    if card == 5:
        board[8] += 1
    if card == 6:
        board[9] += 1
    if card == 7:
        board[10] += 1
    if card == 8:
        board[10] += 2
    if card == 9:
        board[10] += 3
    if card == 10:
        board[11] += 1


def get_maki_score(maki_cnt_list):
    maki_rank = rankdata([_*-1 for _ in maki_cnt_list], method='min')
    maki_score = []
    first_count = np.sum(maki_rank == 1)
    second_count = np.sum(maki_rank == 2)
    for rank in maki_rank:
        if rank == 1:
            maki_score.append(6 / first_count)
        elif rank == 2:
            maki_score.append(3 / second_count)
        else:
            maki_score.append(0)
    return maki_score

def get_pudding_score(pudding_cnt_list):
    pudding_rank = rankdata([_*-1 for _ in pudding_cnt_list], method='min')
    pudding_score = []
    lowest_rank = max(pudding_rank)
    first_count = np.sum(pudding_rank == 1)
    last_count = np.sum(pudding_rank == lowest_rank)
    for rank in pudding_rank:
        if rank == 1:
            pudding_score.append(6 / first_count)
        elif rank == lowest_rank:
            pudding_score.append(-6 / last_count)
        else:
            pudding_score.append(0)
    return pudding_score

def translate_board(board):
    board_list = ast.literal_eval(board)
    res = []
    for i, count in enumerate(board_list):
        res.append(f'{CARDS[i]} X {count}')
    return '  '.join(res)

def convert_hand_to_counter(hand):
    counter = Counter(hand)
    res = [0] * len(CARDS)
    for i in range(len(CARDS)):
        res[i] = counter[i]
    return res

def get_actual_card_pool():
    card_pool = []
    card_pool.extend([0] * 14)
    card_pool.extend([1] * 5)
    card_pool.extend([2] * 10)
    card_pool.extend([3] * 5)
    card_pool.extend([4] * 6)
    card_pool.extend([5] * 14)
    card_pool.extend([6] * 14)
    card_pool.extend([7] * 6)
    card_pool.extend([8] * 12)
    card_pool.extend([9] * 8)
    card_pool.extend([10] * 10)

    return card_pool


def is_available_action(hand, action):
    c1 = convert_hand_to_counter(hand)
    c2 = convert_hand_to_counter(action)
    for i in range(c2):
        if c2[i] > c1[i]:
            return False
    return True

In [30]:
class State:

    def __init__(self, card_pool):
        self.original_card_pool = card_pool
        self.card_pool = copy.copy(self.original_card_pool)
        self.players = []
        self.starting_hand_size = 10
        self.scoreboard = []
        self.deterministic = False

    def add_player(self, player):
        self.players.append(player)

    def deal(self):
        for p in self.players:
            p.prepare_for_next_round()
            for _ in range(self.starting_hand_size):
                if not self.deterministic:
                    random.shuffle(self.card_pool)
                p.draw(self.card_pool.pop())

    def play(self, num_of_rounds=1, output_result=True):
        self.scoreboard = [0] * len(self.players)
        for p in self.players:
            p.prepare_for_next_game()

        for r in range(num_of_rounds):
            self.deal()
            print(self.players[0].hand, self.players[1].hand)
            for turn in range(self.starting_hand_size): # Round num is same as starting hand sizer
                all_player_boards = []  # A list of (player class, player board), to check if it's the player's board
                for p in self.players:
                    all_player_boards.append((p, p.board))
                for p in self.players:
                    p.pick_a_card(all_player_boards)
                self.pass_around()
            for i, p in enumerate(self.players):
                self.scoreboard[i] += p.get_score()

            # Adjustment for Maki
            maki_score = get_maki_score([_.board[10] for _ in self.players])
            for i in range(len(maki_score)):
                self.scoreboard[i] += maki_score[i]

        # Adjustment for Pudding
        pudding_score = get_pudding_score([_.board[11] for _ in self.players])
        for i in range(len(pudding_score)):
            self.scoreboard[i] += pudding_score[i]

        max_score = max(self.scoreboard)
        if output_result:
            for i, p in enumerate(self.players):
                print("Player", i)
                for i in range(len(p.board)):
                    print(f"{CARD_ON_BOARD[i]} X {p.board[i]}")
            print(self.scoreboard, max_score)

        for i, p in enumerate(self.players):
            if self.scoreboard[i] == max_score:
                self.stats[i] += 1
                p.feed_reward(max(1, len(self.players) - 1))  # If there are more than one opponent, put more reward for a win
            else:
                p.feed_reward(-1)

    def play_games(self, num_of_games=1, round_per_game=1, output_result=True):
        self.stats = [0] * len(self.players)
        for i in range(num_of_games):
            self.refresh_state()
            self.play(output_result=output_result, num_of_rounds=round_per_game)
        # print(self.stats)

    def pass_around(self):
        tmp = self.players[-1].hand
        for i in range(len(self.players) - 1, 0, -1):
            self.players[i].hand = self.players[i-1].hand  # Pass to next player
        self.players[0].hand = tmp  # First player get the last player's hand


    def refresh_state(self):
        self.scoreboard = []
        self.card_pool = copy.copy(self.original_card_pool)

In [31]:
class BasePlayer:

    def __init__(self, name):
        self.name = name
        self.hand = []
        self.board = [0] * len(CARD_ON_BOARD)
        self.prepare_for_next_round()

    def draw(self, card):
        self.hand.append(card)

    def pick_a_card(self, all_player_boards):
        raise NotImplementedError

    def get_score(self):
        return get_score(self.board)

    def feed_reward(self, reward):
        # Defaults to not learning
        return

    def prepare_for_next_round(self):
        self.hand = []
        pudding_count = self.board[11]
        self.board = [0] * len(CARD_ON_BOARD)
        self.board[11] = pudding_count

    def prepare_for_next_game(self):
        self.hand = []
        self.board = [0] * len(CARD_ON_BOARD)

In [32]:
class QPlayer(BasePlayer):

    def __init__(self, name):
        super().__init__(name)
        self.decay_gamma = 0.9
        self.lr = 0.01
        self.exp_rate = 0.3
        self.hits = 0
        self.querys = 0

        self.model_dict = {}

        self.prepare_for_next_round()

    def draw(self, card):
        self.hand.append(card)

    def pick_a_card(self, all_player_boards):
        action = None
        max_value = -100
        for possible_next_card in set(self.hand):
            board = copy.copy(self.board)
            add_a_card_to_board(board, possible_next_card)
            self.querys += 1
            if str(board) in self.model_dict:
                self.hits += 1
            value = self.model_dict.get(str(board), 0) + random.random() / 1e6
            if value > max_value:
                max_value = value
                action = possible_next_card

        # Take a card based on action
        self.hand.remove(action)
        add_a_card_to_board(self.board, action)

        # Add state to memory
        self.states_in_game.append(str(self.board))

    def get_score(self):
        return get_score(self.board)

    def feed_reward(self, reward):
        for state in self.states_in_game[::-1]:
            if state not in self.model_dict:
                self.model_dict[state] = 0
            self.model_dict[state] += (reward - self.model_dict[state]) * self.lr
            reward *= self.decay_gamma

    def prepare_for_next_round(self):
        super().prepare_for_next_round()
        self.states_in_game = []
        self.states_in_game.append(str(self.board))

In [33]:
class RandomPlayer(BasePlayer):

    def __init__(self, name, playstyle='random'):
        super().__init__(name)
        self.playstyle = playstyle
        self.prepare_for_next_round()

    def draw(self, card):
        self.hand.append(card)

    def pick_a_card(self, all_player_boards):
        action = None

        if self.playstyle == 'last':  # Always draw first card
            action = self.hand.pop()
            add_a_card_to_board(self.board, action)
            return

        # Pick randomly
        if self.playstyle == 'random':
            random.shuffle(self.hand)
            action = self.hand.pop()
            add_a_card_to_board(self.board, action)
            return

    def get_score(self):
        return get_score(self.board)

    def feed_reward(self, reward):
        return

In [34]:
card_pool = []
card_pool.extend([0] * 0)
card_pool.extend([1] * 10)
card_pool.extend([2] * 10)
card_pool.extend([3] * 10)
card_pool.extend([7] * 30)  # Wasabi
card_pool.extend([8] * 0)  # Tempura

state = State(card_pool)
p1 = RandomPlayer('Player 1')
p2 = RandomPlayer('Player 2')
p3 = QPlayer('Player 3')
state.add_player(p1)
state.add_player(p2)
state.add_player(p3)
state.play_games(1)

[2, 3, 2, 7, 3, 2, 7, 1, 1, 7] [7, 7, 7, 1, 7, 1, 7, 7, 7, 7]
True
Player 0
Sashimi X 0
Egg Nigiri X 2
Salmon Nigiri X 2
Squid Nigiri X 2
Wasabi Egg X 0
Wasabi Salmon X 0
Wasabi Squid X 0
Wasabi X 0
Tempura X 0
Dumpling X 0
Maki X 4
Pudding X 0
Chopsticks X 0
Player 1
Sashimi X 0
Egg Nigiri X 0
Salmon Nigiri X 2
Squid Nigiri X 0
Wasabi Egg X 0
Wasabi Salmon X 0
Wasabi Squid X 0
Wasabi X 0
Tempura X 0
Dumpling X 0
Maki X 8
Pudding X 0
Chopsticks X 0
Player 2
Sashimi X 0
Egg Nigiri X 3
Salmon Nigiri X 1
Squid Nigiri X 1
Wasabi Egg X 0
Wasabi Salmon X 0
Wasabi Squid X 0
Wasabi X 0
Tempura X 0
Dumpling X 0
Maki X 5
Pudding X 0
Chopsticks X 0
[14.0, 12.0, 13.0] 14.0
