In [227]:
import numpy as np
import math

In [563]:
SUN_RANK = [7, 8, 9, 11, 12, 13, 10, 1]
SUN_SCORES = {
    7: 0,
    8: 0,
    9: 0,
    11: 2,
    12: 3,
    13: 4,
    10: 10,
    1: 11
}

In [287]:
np.random.seed(42)

# SUIT ORDER: HEARTS, SPADES, DIAMONDS, CLUBS

player_cards = np.array([rank + suit * 13 for suit in (0, 1, 2, 3) for rank in (1, 7, 8, 9, 10, 11, 12, 13)], dtype='uint8')
np.random.shuffle(player_cards)
player_cards = player_cards.reshape(4, 8)

player_cards

array([[50, 26, 40, 33, 14, 20, 51, 46],
       [23,  1, 10, 27, 11, 24, 22, 39],
       [ 7,  8, 47,  9, 37, 48, 38, 34],
       [52, 36, 13, 21, 25, 49, 35, 12]], dtype=uint8)

In [557]:
def get_suit(card):
    return (card - 1) // 13

def get_rank(card):
    return (card - 1) % 13 + 1

def has_suit(cards, suit):
    for card in cards:
        if get_suit(card) == suit:
            return True
    return False

def get_trick_winner(trick, starter=0):
    trick_suit = get_suit(trick[starter])
    
    winner = starter
    max_rank_idx = SUN_RANK.index(get_rank(trick[starter]))
    for i, card in enumerate(trick):
        if get_suit(card) == trick_suit and SUN_RANK.index(get_rank(trick[i])) > max_rank_idx:
            max_rank_idx = SUN_RANK.index(get_rank(trick[i]))
            winner = i

    return winner

def calculate_score(played_cards):
    cur_starter = 0
    total_score = 0
    
    for i in range(0, 28, 4):
        cur_starter = get_trick_winner(played_cards[i: i + 4], cur_starter)
        round_score = 0
        for j in range(4):
            card_rank = get_rank(played_cards[i + j])
            round_score += SUN_SCORES[card_rank]
            
        if cur_starter == 1 or cur_starter == 3:
            total_score -= round_score
        else:
            total_score += round_score

    return total_score

In [591]:
class State:
    def __init__(self, played_cards, current_cards, current_player, num_of_played_cards, trick_starter, parent):
        self.state_score = 0
        self.state_visits = 0
        self.played_cards = played_cards # 1D array of cards played in order
        self.current_cards = current_cards # 2D array of the remaining cards in each player's hand
        self.current_player = current_player
        self.num_of_played_cards = num_of_played_cards
        self.trick_starter = trick_starter
        self.child_states = {
            # action: new_state
        }
        self.parent = parent

    def expand(self):
        if self.num_of_played_cards % 4 == 0:
            # first card in the trick
            for i, card in enumerate(self.current_cards[self.current_player]):
                if card == 0:
                    continue
                    
                updated_current_cards = self.current_cards.copy()
                updated_current_cards[self.current_player][i] = 0

                updated_played_cards = self.played_cards.copy()
                updated_played_cards[self.num_of_played_cards] = card

                self.child_states[card] = State(updated_played_cards, updated_current_cards, (self.current_player + 1) % 4,
                                                self.num_of_played_cards + 1, self.trick_starter, self)

        else:
            trick_first_card = self.played_cards[(self.num_of_played_cards // 4) * 4]
            trick_suit = get_suit(trick_first_card)

            player_has_suit = has_suit(self.current_cards[self.current_player], trick_suit)
            for i, card in enumerate(self.current_cards[self.current_player]):
                if player_has_suit and trick_suit == get_suit(card) or not player_has_suit:
                    if card == 0:
                        continue
                        
                    updated_current_cards = self.current_cards.copy()
                    updated_current_cards[self.current_player][i] = 0
    
                    updated_played_cards = self.played_cards.copy()
                    updated_played_cards[self.num_of_played_cards] = card

                    if self.num_of_played_cards % 4 == 3: # last card in the trick
                        updated_trick_starter = (get_trick_winner(updated_played_cards[self.num_of_played_cards - 3: self.num_of_played_cards + 1]) + self.trick_starter) % 4
                    
                        self.child_states[card] = State(updated_played_cards, updated_current_cards, updated_trick_starter,
                                                    self.num_of_played_cards + 1, updated_trick_starter, self)
                    else:
                        self.child_states[card] = State(updated_played_cards, updated_current_cards, (self.current_player + 1) % 4,
                                                self.num_of_played_cards + 1, self.trick_starter, self)

    def ucb1_score(self, child_state):
        if child_state.state_visits == 0:
            return float('inf')
        else:
            exploitation_term = child_state.state_score / child_state.state_visits
            constant = 2
            exploration_term = math.sqrt(math.log(self.state_visits) / child_state.state_visits)
            
            return exploitation_term + constant * exploration_term
    
    def get_optimal_node(self):
        if self.current_player == 1 or self.current_player == 3:
            maximizing = False
        else:
            maximizing = True
        
        optimal_action = None
        optimal_child_state = None
        if maximizing:
            optimal_ucb1_score = -float('inf')
        else:
            optimal_ucb1_score = float('inf')
            
        for action, child_state in self.child_states.items():
            current_child_ucb1_score = self.ucb1_score(child_state)
            if (maximizing and current_child_ucb1_score > optimal_ucb1_score) or (not maximizing and current_child_ucb1_score < optimal_ucb1_score):
                optimal_ucb1_score = current_child_ucb1_score
                optimal_action = action
                optimal_child_state = child_state
        
        return optimal_child_state

    def pick_random_card(self, current_cards, current_player, suit=None):
        current_player_cards = current_cards[current_player]
        if suit is None:
            card = np.random.choice(current_player_cards)
            while card == 0:
                card = np.random.choice(current_player_cards)
            idx = np.where(current_player_cards == card)[0][0]
            return card, idx
        else:
            card = np.random.choice(current_player_cards)
            while card == 0 or get_suit(card) != suit:
                card = np.random.choice(current_player_cards)
            idx = np.where(current_player_cards == card)[0][0]
            return card, idx
    
    def rollout(self):
        updated_played_cards = self.played_cards.copy()
        updated_current_cards = self.current_cards.copy()
        updated_current_player = self.current_player
        updated_trick_starter = self.trick_starter
        
        for i in range(self.num_of_played_cards, 32):
            if i % 4 == 0:
                card, idx = self.pick_random_card(updated_current_cards, updated_current_player)
            else:
                trick_first_card = updated_played_cards[(i // 4) * 4]
                trick_suit = get_suit(trick_first_card)

                player_has_suit = has_suit(updated_current_cards[updated_current_player], trick_suit)
                
                if player_has_suit:
                    card, idx = self.pick_random_card(updated_current_cards, updated_current_player, trick_suit)
                else:
                    card, idx = self.pick_random_card(updated_current_cards, updated_current_player)
                    
            
            updated_played_cards[i] = card
            updated_current_cards[updated_current_player][idx] = 0

            if i % 4 == 3:
                updated_trick_starter = (get_trick_winner(updated_played_cards[i - 3: i + 1]) + updated_trick_starter) % 4
                updated_current_player = updated_trick_starter
            else:
                updated_current_player = (updated_current_player + 1) % 4

        rollout_score = calculate_score(updated_played_cards)

        self.state_score += rollout_score
        self.state_visits += 1
        
        cur_parent = self.parent

        while cur_parent != None:
            cur_parent.state_score += rollout_score
            cur_parent.state_visits += 1
            cur_parent = cur_parent.parent

In [589]:
initial_state = State(played_cards=np.zeros(32, dtype='uint8'), current_cards=player_cards.copy(), current_player=0, num_of_played_cards=0,
                     trick_starter=0, parent=None)

calculate_score(initial_state.rollout())

84