# El Gran Dalmuti - QLearning

In [1]:
import numpy as np

### Basic game mechanics

In [2]:
NUM_CARD_VALUES = 13
JOKER = 12  # Jokers at index 12


def has_already_won(hand):
    """
    hand - vector with 13 entries (number of 1, 2, ..., 12, Jokers)
    """

    if len(hand.shape) == 1:
        return np.all(hand == 0)
    else:
        return np.all(hand == 0, axis=1)
    
    
def get_cards_array(card_type, num_cards):
    """ Vector representation of the cards of one kind. """
    
    cards_array = np.zeros(NUM_CARD_VALUES, dtype=np.int8)
    cards_array[card_type] = num_cards
    return cards_array
    

def possible_next_moves(hand, board):
    """
    Returns possible next moves as a list of tuples (new hand, new board)
    """
    
    card_type_in_board = np.argmax(board)
    num_cards_in_board = board[card_type_in_board] + board[JOKER]
    
    # You can always pass if it is not the initial move
    possible_hands = np.reshape(hand, (1, NUM_CARD_VALUES))
    possible_boards = np.reshape(board, (1, NUM_CARD_VALUES))
    
    if not has_already_won(hand):
        for card_type_in_hand in range(NUM_CARD_VALUES - 1, -1, -1):
            # You can play clean
            if card_type_in_hand < card_type_in_board and \
              hand[card_type_in_hand] >= num_cards_in_board:
                new_board = get_cards_array(card_type_in_hand, num_cards_in_board)
                new_hand = hand - new_board
                possible_hands = np.vstack([possible_hands, new_hand])
                possible_boards = np.vstack([possible_boards, new_board])

            # Or you can play dirty (with Joker(s))
            if card_type_in_hand != JOKER and hand[JOKER] > 0 and \
              card_type_in_hand < card_type_in_board and hand[card_type_in_hand] > 0 and \
              hand[card_type_in_hand] + hand[JOKER] >= num_cards_in_board:
                # Use one joker
                if hand[card_type_in_hand] + 1 >= num_cards_in_board:
                    joker_vec = get_cards_array(JOKER, 1)
                    new_board = get_cards_array(card_type_in_hand, num_cards_in_board - 1) + joker_vec
                    new_hand = hand - new_board
                    possible_hands = np.vstack([possible_hands, new_hand])
                    possible_boards = np.vstack([possible_boards, new_board])

                # Use two jokers
                if hand[JOKER] == 2 and num_cards_in_board > 2:
                    joker_vec = get_cards_array(JOKER, 2)
                    new_board = get_cards_array(card_type_in_hand, num_cards_in_board - 2) + joker_vec
                    new_hand = hand - new_board
                    possible_hands = np.vstack([possible_hands, new_hand])
                    possible_boards = np.vstack([possible_boards, new_board])
            
    return possible_hands, possible_boards
        
    
# Some tests
assert has_already_won(np.zeros(NUM_CARD_VALUES))
assert not has_already_won(np.ones(NUM_CARD_VALUES))
assert np.all(has_already_won(np.zeros((2, NUM_CARD_VALUES))) == np.array([True, True]))
assert np.all(has_already_won(np.array([[0,0,0,0,1], [0,0,0,0,0], [1,0,0,0,0]])) == np.array([False, True, False]))

assert np.all(get_cards_array(1, 2) == np.array([0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
assert np.all(get_cards_array(4, 3) == np.array([0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0]))
assert not np.all(get_cards_array(4, 3) == np.array([0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0]))

# Tests for possible moves
h, b = possible_next_moves(np.array([0, 2, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2]),
                           np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0]))
assert np.all(h == np.array([[0., 2., 0., 0., 0., 0., 0., 2., 0., 2., 0., 0., 2.],
                             [0., 2., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 2.],
                             [0., 2., 0., 0., 0., 0., 0., 1., 0., 2., 0., 0., 1.],
                             [0., 0., 0., 0., 0., 0., 0., 2., 0., 2., 0., 0., 2.],
                             [0., 1., 0., 0., 0., 0., 0., 2., 0., 2., 0., 0., 1.]]))
assert np.all(b == np.array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 0.],
                             [0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 0., 0., 0.],
                             [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1.],
                             [0., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
                             [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]))
h, b = possible_next_moves(np.array([1, 2, 3, 1, 0, 0, 0, 3, 0, 4, 0, 0, 2]),
                           np.array([0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1]))
assert np.all(h == np.array([[1., 2., 3., 1., 0., 0., 0., 3., 0., 4., 0., 0., 2.],
                             [1., 2., 3., 1., 0., 0., 0., 0., 0., 4., 0., 0., 2.],
                             [1., 2., 3., 1., 0., 0., 0., 1., 0., 4., 0., 0., 1.],
                             [1., 2., 3., 1., 0., 0., 0., 2., 0., 4., 0., 0., 0.],
                             [1., 2., 3., 0., 0., 0., 0., 3., 0., 4., 0., 0., 0.],
                             [1., 2., 0., 1., 0., 0., 0., 3., 0., 4., 0., 0., 2.],
                             [1., 2., 1., 1., 0., 0., 0., 3., 0., 4., 0., 0., 1.],
                             [1., 2., 2., 1., 0., 0., 0., 3., 0., 4., 0., 0., 0.],
                             [1., 0., 3., 1., 0., 0., 0., 3., 0., 4., 0., 0., 1.],
                             [1., 1., 3., 1., 0., 0., 0., 3., 0., 4., 0., 0., 0.],
                             [0., 2., 3., 1., 0., 0., 0., 3., 0., 4., 0., 0., 0.]]))
assert np.all(b == np.array([[0., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 0., 1.],
                             [0., 0., 0., 0., 0., 0., 0., 3., 0., 0., 0., 0., 0.],
                             [0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 0., 0., 1.],
                             [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 2.],
                             [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 2.],
                             [0., 0., 3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
                             [0., 0., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
                             [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2.],
                             [0., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
                             [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2.],
                             [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2.]]))

In [3]:
NUM_PLAYERS = 4
AVAILABLE_CARDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 2]
PLAYER = list(range(NUM_PLAYERS))


def random_initial_cards():
    """ Random initial state for the game. """
    
    deck = np.array([], dtype=np.int8)

    for card_type in range(NUM_CARD_VALUES):
        deck = np.append(deck, np.array([card_type for _ in range(AVAILABLE_CARDS[card_type])]))
    
    np.random.shuffle(deck)
    
    chunk = deck.shape[0] // NUM_PLAYERS
    remainder = deck.shape[0] % NUM_PLAYERS
    first_player_initialized = False
    
    for playerIndex in range(NUM_PLAYERS):
        beginOfChunk = playerIndex * chunk + min(playerIndex, remainder)
        endOfChunk = (playerIndex + 1) * chunk + min(playerIndex + 1, remainder)
        player = np.zeros(NUM_CARD_VALUES, dtype=np.int8)
        
        for card in deck[beginOfChunk:endOfChunk]:
            player += get_cards_array(card, 1)
            
        if first_player_initialized:
            player_initial_hands = np.vstack([player_initial_hands, player])
        else:
            first_player_initialized = True
            player_initial_hands = player
        
    return player_initial_hands
        
        
random_initial_cards()

array([[0, 1, 0, 1, 0, 2, 1, 0, 3, 1, 5, 6, 0],
       [0, 1, 1, 2, 0, 1, 2, 2, 3, 2, 1, 3, 2],
       [1, 0, 2, 0, 3, 2, 1, 2, 2, 2, 4, 1, 0],
       [0, 0, 0, 1, 2, 1, 3, 4, 1, 5, 1, 2, 0]], dtype=int8)