In [434]:
import numpy as np
import time
from numba import njit

In [414]:
NUM_OF_STARTING_CARDS = 6
SUN_RANK = [7, 8, 9, 11, 12, 13, 10, 1]
SUN_SCORES = {
    7: 0,
    8: 0,
    9: 0,
    11: 2,
    12: 3,
    13: 4,
    10: 10,
    1: 11
}

In [436]:
np.random.seed(42)

player_cards = np.array([rank + suit * 13 for suit in (0, 1, 2, 3) for rank in (1, 7, 8, 9, 10, 11, 12, 13)], dtype='uint8')
np.random.shuffle(player_cards)
player_cards = player_cards[:NUM_OF_STARTING_CARDS * 4].reshape(4, NUM_OF_STARTING_CARDS)


player_cards.sort()

player_cards

array([[14, 20, 26, 33, 40, 50],
       [ 1, 10, 23, 27, 46, 51],
       [ 7,  8, 11, 22, 24, 39],
       [ 9, 34, 37, 38, 47, 48]], dtype=uint8)

In [418]:
def get_suit(card):
    return (card - 1) // 13

def get_rank(card):
    return (card - 1) % 13 + 1

def has_suit(cards, suit):
    for card in cards:
        if get_suit(card) == suit:
            return True
    return False

def get_trick_winner(trick, starter=0):
    trick_suit = get_suit(trick[0])
    
    winner = starter
    max_rank_idx = SUN_RANK.index(get_rank(trick[0]))
    for i, card in enumerate(trick):
        if get_suit(card) == trick_suit and SUN_RANK.index(get_rank(trick[i])) > max_rank_idx:
            max_rank_idx = SUN_RANK.index(get_rank(trick[i]))
            winner = (i + starter) % 4

    return winner

In [420]:
class State:
    def __init__(self, played_cards, current_cards, current_player, num_of_played_cards, trick_starter):
        self.played_cards = played_cards # 1D array of cards played in order
        self.current_cards = current_cards # 2D array of the remaining cards in each player's hand
        self.current_player = current_player
        self.num_of_played_cards = num_of_played_cards
        self.trick_starter = trick_starter
        self.child_states = {
            # action: new_state
        }

    def expand(self):
        if self.num_of_played_cards % 4 == 0:
            # first card in the trick
            for i, card in enumerate(self.current_cards[self.current_player]):
                if card == 0:
                    continue
                    
                updated_current_cards = self.current_cards.copy()
                updated_current_cards[self.current_player][i] = 0

                updated_played_cards = self.played_cards.copy()
                updated_played_cards[self.num_of_played_cards] = card

                self.child_states[card] = State(updated_played_cards, updated_current_cards, (self.current_player + 1) % 4,
                                                self.num_of_played_cards + 1, self.trick_starter)

        else:
            trick_first_card = self.played_cards[(self.num_of_played_cards // 4) * 4]
            trick_suit = get_suit(trick_first_card)

            player_has_suit = has_suit(self.current_cards[self.current_player], trick_suit)
            for i, card in enumerate(self.current_cards[self.current_player]):
                if player_has_suit and trick_suit == get_suit(card) or not player_has_suit:
                    if card == 0:
                        continue
                        
                    updated_current_cards = self.current_cards.copy()
                    updated_current_cards[self.current_player][i] = 0
    
                    updated_played_cards = self.played_cards.copy()
                    updated_played_cards[self.num_of_played_cards] = card

                    if self.num_of_played_cards % 4 == 3: # last card in the trick
                        updated_trick_starter = (get_trick_winner(updated_played_cards[self.num_of_played_cards - 3: self.num_of_played_cards + 1]) + self.trick_starter) % 4
                    
                        self.child_states[card] = State(updated_played_cards, updated_current_cards, updated_trick_starter,
                                                    self.num_of_played_cards + 1, updated_trick_starter)
                    else:
                        self.child_states[card] = State(updated_played_cards, updated_current_cards, (self.current_player + 1) % 4,
                                                self.num_of_played_cards + 1, self.trick_starter)
    
    def calculate_score(self):
        if self.num_of_played_cards % 4 != 0:
            raise ValueError("Cannot calculate score before completing the current trick.")
        
        cur_starter = 0
        total_score = 0
        
        for i in range(0, self.num_of_played_cards - 3, 4):
            cur_starter = get_trick_winner(self.played_cards[i: i + 4], cur_starter)
            round_score = 0
            for j in range(4):
                card_rank = get_rank(self.played_cards[i + j])
                round_score += SUN_SCORES[card_rank]
    
            if cur_starter == 0 or cur_starter == 2:
                total_score += round_score
    
            if i == NUM_OF_STARTING_CARDS * 4 - 4 and (cur_starter == 0 or cur_starter == 2):
                total_score += 10
    
        return total_score

In [422]:
class GameState:
    def __init__(self, current_cards):
        self.current_cards = current_cards
        self.played_cards = np.zeros(4 * NUM_OF_STARTING_CARDS, dtype='uint8')
        self.current_player = 0
        self.num_of_played_cards = 0
        self.card_indices = np.zeros(4 * NUM_OF_STARTING_CARDS, dtype='uint8') - 1
        self.player_indices = np.zeros(4 * NUM_OF_STARTING_CARDS, dtype='uint8') - 1

    def get_legal_moves(self):
        legal_moves = []
        current_player_cards = self.current_cards[self.current_player]
        if self.num_of_played_cards % 4 == 0:
            for card in current_player_cards:
                if card != 0:
                    legal_moves.append(card)
            return legal_moves
        else:
            trick_first_card = self.played_cards[(self.num_of_played_cards // 4) * 4]
            trick_suit = get_suit(trick_first_card)
            player_has_suit = has_suit(current_player_cards, trick_suit)
            for card in current_player_cards:
                if player_has_suit and trick_suit == get_suit(card) or not player_has_suit and card != 0:
                    legal_moves.append(card)
            return legal_moves

    def choose(self, card):
        if self.num_of_played_cards == NUM_OF_STARTING_CARDS * 4:
            raise ValueError("Cannot choose a card since all cards have been played.")
            

        current_player_cards = self.current_cards[self.current_player]
        card_idx = np.where(current_player_cards == card)[0][0]
        
        self.played_cards[self.num_of_played_cards] = card
        current_player_cards[card_idx] = 0

        self.player_indices[self.num_of_played_cards] = self.current_player
        self.card_indices[self.num_of_played_cards] = card_idx

        # UPDATE TO NEXT PLAYER
        if self.num_of_played_cards % 4 == 3:
            current_trick_starter = self.player_indices[self.num_of_played_cards - (self.num_of_played_cards % 4)]
            self.current_player = get_trick_winner(self.played_cards[self.num_of_played_cards - 3: self.num_of_played_cards + 1], current_trick_starter)
        else:
            self.current_player = (self.current_player + 1) % 4
        
        self.num_of_played_cards += 1

    def unchoose(self):
        if self.num_of_played_cards == 0:
            raise ValueError("Cannot unchoose a card since no cards have been played.")

        self.current_player = self.player_indices[self.num_of_played_cards - 1]
        self.player_indices[self.num_of_played_cards - 1] = 255

        card_idx = self.card_indices[self.num_of_played_cards - 1]
        self.card_indices[self.num_of_played_cards - 1] = 255
        
        self.current_cards[self.current_player][card_idx] = self.played_cards[self.num_of_played_cards - 1]
        self.played_cards[self.num_of_played_cards - 1] = 0
            
        self.num_of_played_cards -= 1

    def calculate_score(self):
        if self.num_of_played_cards % 4 != 0:
            raise ValueError("Cannot calculate score before completing the current trick.")
        
        cur_starter = 0
        total_score = 0
        
        for i in range(0, self.num_of_played_cards - 3, 4):
            cur_starter = get_trick_winner(self.played_cards[i: i + 4], cur_starter)
            round_score = 0
            for j in range(4):
                card_rank = get_rank(self.played_cards[i + j])
                round_score += SUN_SCORES[card_rank]
    
            if cur_starter == 0 or cur_starter == 2:
                total_score += round_score
    
            if i == NUM_OF_STARTING_CARDS * 4 - 4 and (cur_starter == 0 or cur_starter == 2):
                total_score += 10
    
        return total_score

In [424]:
def minimax(state, depth, alpha, beta, maximizing):    
    global NODES_VISITED
    NODES_VISITED += 1
    
    if depth == 0 or state.num_of_played_cards == NUM_OF_STARTING_CARDS * 4:
        return state.calculate_score(), state

    if not state.child_states:
        state.expand()

    if maximizing:
        max_score = -float("inf")
        max_state = None
        for action, child in state.child_states.items():
            score, cur_state = minimax(child, depth - 1, alpha, beta, False if (child.current_player == 1 or child.current_player == 3) else True)
            if score > max_score:
                max_score = score
                max_state = cur_state
            alpha = max(alpha, score)
            if beta <= alpha:
                break
        return max_score, max_state
    
    else:
        min_score = float("inf")
        min_state = None
        for action, child in state.child_states.items():
            score, cur_state = minimax(child, depth - 1, alpha, beta, False if (child.current_player == 1 or child.current_player == 3) else True)
            if score < min_score:
                min_score = score
                min_state = cur_state
            beta = min(beta, score)
            if beta <= alpha:
                break
        return min_score, min_state

In [442]:
initial_state = State(played_cards=np.zeros(NUM_OF_STARTING_CARDS * 4, dtype='uint8'), current_cards=player_cards.copy(), current_player=0, num_of_played_cards=0,
                     trick_starter=0)

In [443]:
NODES_VISITED = 0

start = time.perf_counter()

max_score, max_state = minimax(initial_state, NUM_OF_STARTING_CARDS * 4, -float("inf"), float("inf"), True)

end = time.perf_counter()

print(max_score)
print(max_state.played_cards)
print(f"Time Elapsed: {end - start}")
print(f"NODES VISITED: {NODES_VISITED}")

72
[14 23 22  9 26  1 24 34 20 46  7 37 40 51 11 47 50 10 39 48 33 27  8 38]
Time Elapsed: 51.68571324198274
NODES VISITED: 730255


In [402]:
SUITS = {
    0: "Hearts",
    1: "Spades",
    2: "Diamonds",
    3: "Clubs",
}

for i, card in enumerate(max_state.played_cards):
    print(f"Card #{i + 1}: {get_rank(card)} of {SUITS[get_suit(card)]}")

Card #1: 1 of Spades
Card #2: 10 of Spades
Card #3: 9 of Spades
Card #4: 9 of Hearts
Card #5: 13 of Spades
Card #6: 1 of Hearts
Card #7: 11 of Spades
Card #8: 8 of Diamonds
Card #9: 7 of Spades
Card #10: 7 of Clubs
Card #11: 7 of Hearts
Card #12: 11 of Diamonds
Card #13: 1 of Clubs
Card #14: 12 of Clubs
Card #15: 11 of Hearts
Card #16: 8 of Clubs
Card #17: 11 of Clubs
Card #18: 10 of Hearts
Card #19: 13 of Diamonds
Card #20: 9 of Clubs
Card #21: 7 of Diamonds
Card #22: 1 of Diamonds
Card #23: 8 of Hearts
Card #24: 12 of Diamonds


In [404]:

for i, card in enumerate(player_cards.reshape(NUM_OF_STARTING_CARDS * 4)):
    print(f"Card #{i + 1}: {get_rank(card)} of {SUITS[get_suit(card)]}")

Card #1: 1 of Spades
Card #2: 7 of Spades
Card #3: 13 of Spades
Card #4: 7 of Diamonds
Card #5: 1 of Clubs
Card #6: 11 of Clubs
Card #7: 12 of Clubs
Card #8: 1 of Hearts
Card #9: 10 of Hearts
Card #10: 11 of Hearts
Card #11: 10 of Spades
Card #12: 11 of Spades
Card #13: 1 of Diamonds
Card #14: 7 of Clubs
Card #15: 7 of Hearts
Card #16: 8 of Hearts
Card #17: 9 of Hearts
Card #18: 9 of Spades
Card #19: 11 of Diamonds
Card #20: 13 of Diamonds
Card #21: 8 of Clubs
Card #22: 13 of Hearts
Card #23: 8 of Spades
Card #24: 8 of Diamonds
Card #25: 10 of Diamonds
Card #26: 12 of Diamonds
Card #27: 9 of Clubs
Card #28: 13 of Clubs


In [406]:
def game_state_minimax(game_state, depth, alpha, beta, maximizing):

    global NODES_VISITED
    NODES_VISITED += 1
    
    if depth == 0 or game_state.num_of_played_cards == NUM_OF_STARTING_CARDS * 4:
        return game_state.calculate_score(), game_state.played_cards.copy()

    if maximizing:
        max_score = -float("inf")
        max_path = None
        for action in game_state.get_legal_moves():
            game_state.choose(action)
            score, path = game_state_minimax(game_state, depth - 1, alpha, beta, False if (game_state.current_player == 1 or game_state.current_player == 3) else True)
            if score > max_score:
                max_score = score
                max_path = path
            game_state.unchoose()
            alpha = max(alpha, score)
            if beta <= alpha:
                break
        return max_score, max_path
    
    else:
        min_score = float("inf")
        min_path = None
        for action in game_state.get_legal_moves():
            game_state.choose(action)
            score, path = game_state_minimax(game_state, depth - 1, alpha, beta, False if (game_state.current_player == 1 or game_state.current_player == 3) else True)
            if score < min_score:
                min_score = score
                min_path = path
            game_state.unchoose()
            beta = min(beta, score)
            if beta <= alpha:
                break
        return min_score, min_path

In [439]:
NODES_VISITED = 0

start = time.perf_counter()

game_state = GameState(player_cards.copy())

best_score, best_path = game_state_minimax(game_state, 4 * NUM_OF_STARTING_CARDS, -float("inf"), float("inf"), True)

end = time.perf_counter()

print(best_score)
print(best_path)
print(f"Time Elapsed: {end - start}")
print(f"NODES VISITED: {NODES_VISITED}")

72
[14 23 22  9 26  1 24 34 20 46  7 37 40 51 11 47 50 10 39 48 33 27  8 38]
Time Elapsed: 39.39665660401806
NODES VISITED: 730255
