# El Gran Dalmuti - QLearning

In [23]:
import numpy as np

### Basic game mechanics

In [24]:
CARD_VALUES = 13
JOKER = 12  # Jokers at index 12
PLAYERS = 4

player = list(range(PLAYERS))


def has_already_won(hand):
    """
    hand - vector with 13 entries (number of 1, 2, ..., 12, Jokers)
    """

    return np.all(hand == 0)
    
    
def get_cards_array(card_type, num_cards):
    """ Vector representation of the cards of one kind. """
    
    cards_array = np.zeros(CARD_VALUES)
    cards_array[card_type] = num_cards
    return cards_array
    

def possible_next_moves(hand, board):
    """
    Returns possible next moves as a list of tuples (new hand, new board)
    """
    
    card_type_in_board = np.argmax(board)
    num_cards_in_board = board[card_type_in_board]
    
    # You can always pass
    possible_hands = hand
    possible_boards = board
    
    for card_type_in_hand in range(CARD_VALUES - 1, -1, -1):
        # You can play clean
        if card_type_in_hand < card_type_in_board and \
          hand[card_type_in_hand] >= num_cards_in_board:
            new_board = get_cards_array(card_type_in_hand, num_cards_in_board)
            new_hand = hand - new_board
            possible_hands = np.vstack([possible_hands, new_hand])
            possible_boards = np.vstack([possible_boards, new_board])
        
        # Or you can play dirty (with Joker(s))
        if card_type_in_hand != JOKER and hand[JOKER] > 0 and \
          card_type_in_hand < card_type_in_board and hand[card_type_in_hand] > 0 and \
          hand[card_type_in_hand] + hand[JOKER] >= num_cards_in_board:
            # Use one joker
            if hand[card_type_in_hand] + 1 >= num_cards_in_board:
                joker_vec = get_cards_array(JOKER, 1)
                new_board = get_cards_array(card_type_in_hand, num_cards_in_board - 1) + joker_vec
                new_hand = hand - new_board
                possible_hands = np.vstack([possible_hands, new_hand])
                possible_boards = np.vstack([possible_boards, new_board])
            
            # Use two jokers
            if hand[JOKER] == 2 and num_cards_in_board > 2:
                joker_vec = get_cards_array(JOKER, 2)
                new_board = get_cards_array(card_type_in_hand, num_cards_in_board - 2) + joker_vec
                new_hand = hand - new_board
                possible_hands = np.vstack([possible_hands, new_hand])
                possible_boards = np.vstack([possible_boards, new_board])
            
    return possible_hands, possible_boards
        
    
# Some tests
assert has_already_won(np.zeros(CARD_VALUES))
assert not has_already_won(np.ones(CARD_VALUES))

assert np.all(get_cards_array(1, 2) == np.array([0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]))
assert np.all(get_cards_array(4, 3) == np.array([0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0]))
assert not np.all(get_cards_array(4, 3) == np.array([0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0]))

# Tests for possible moves
h, b = possible_next_moves(np.array([0, 2, 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 2]),
                           np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0]))
assert np.all(h == np.array([[0., 2., 0., 0., 0., 0., 0., 2., 0., 2., 0., 0., 2.],
                             [0., 2., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 2.],
                             [0., 2., 0., 0., 0., 0., 0., 1., 0., 2., 0., 0., 1.],
                             [0., 0., 0., 0., 0., 0., 0., 2., 0., 2., 0., 0., 2.],
                             [0., 1., 0., 0., 0., 0., 0., 2., 0., 2., 0., 0., 1.]]))
assert np.all(b == np.array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 0.],
                             [0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 0., 0., 0.],
                             [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1.],
                             [0., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
                             [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]]))

In [13]:
a = np.array([[1,2,3], [0,0,0]])
b = np.array([1,2,1])

array([[1, 2, 1]])