# Imports

In [None]:
from abc import ABC, abstractproperty, abstractmethod
from typing import List, Tuple

import random

import numpy as np

# Hyperparameters

In [None]:
GOOD_CARD_REWARD = 1
DRAW_CARD_REWARD = -1

WIN_GAME_REWARD = 100
LOSE_GAME_REWARD = -100

HIDDEN_LAYER_SIZE = 64

# UnoGame

In [None]:
class UnoGameMaster():
  def __init__(self):
    self._deck: List[Tuple[int, int]] = []

    self._discard: List[Tuple[int, int]] = []
    self._top = None

    self._agents = []
    self._index = 0 #index of which agents turn it is
    self._dir = 1 #direction of play
    self._turn = 1

    self._penalty = 1

    self._winner = None


  def state(self):
    print(f"deck:\n{self._deck}\nsize: {len(self._deck)}\n")
    print(f"discard:\n{self._discard}\nsize: {len(self._discard)}\n")
    print(f"top:\n{self._top}\n")
    print(f"agents:\n{[agent.get_name() for agent in self._agents]}\n")
    print(f"turn:\n{self._turn}\n")
    print(f"penalty:\n{self._penalty}\n")


  def setup(self, agents) -> bool:
    """
    reset and shuffle the deck, deal cards, and start the discard pile.

    Parameters:
    - List[UnoAgent]: agents in the game

    Returns:
    - True

    """
    self._deck = []
    for color in range(0, 4):
      self._deck.append((color, 0))
      for value in range(1, 13):
        self._deck.append((color, value))
        self._deck.append((color, value))
    random.shuffle(self._deck)

    self._discard = []
    self._top = None

    for agent in agents:
      agent.clear_hand()
    self._agents = agents
    self._index = 0
    self._dir = 1
    self._turn = 1

    self._penalty = 1

    self._winner = None

    #deal cards
    for i in range(7):
      for agent in self._agents:
        agent.add_card(self.get_card_from_deck())

    start_discard = self.get_card_from_deck()
    self.add_to_discard(start_discard)
    self.perform_card_action(start_discard)


  def get_top(self) -> Tuple[int, int]:
    return self._top


  def get_penalty(self) -> int:
    return self._penalty


  def get_turn_agent(self):
    self._index = self._index % len(self._agents)
    return self._agents[self._index]


  def get_card_from_deck(self) -> Tuple[int, int]:
    """
    pops the top card of the deck and returns it, adding in discard pile if empty

    Returns:
    - Tuple[int, int]: card
    """
    if (len(self._deck) < 1):
      self._deck = self._discard[:-1]
      random.shuffle(self._deck)
      self._discard = [self._top]

    return self._deck.pop()


  def add_to_discard(self, card: Tuple[int, int]) -> bool:
    """
    puts card in discard pile and makes it the top card.

    Parameters:
    - Tuple[int, int]

    Returns:
    - True
    """
    self._discard.append(card)
    self._top = card
    return True


  def add_to_deck(self, card: Tuple[int, int]) -> bool:
    """
    puts card at the bottom of the deck.

    Parameters:
    - Tuple[int, int]

    Returns:
    - True
    """
    self._deck.insert(0, card)
    return True


  def is_playable_card(self, card: Tuple[int, int]) -> bool:
    """
    whether a card is playable.

    Parameters:
    - Tuple[int, int]: card to play

    Return:
    - bool: true if playable
    """
    if (self._top[1] == card[1] or (self._top[0] == card[0] and self._penalty == 1)):
      return True
    return False


  def is_drawing_move(self, move: Tuple[int, int]) -> bool:
    """
    checks if this move is (-1, -1) for drawing cards

    Parameters:
    - Tuple[int, int]: move

    Return:
    - bool: true if (-1, -1)
    """
    if (move == (-1, -1)):
      return True
    return False


  def perform_card_action(self, card: Tuple[int, int]) -> bool:
    """
    checks if a card is an action card and then modifies the game state accordingly

    Parameters:
    - Tuple[int, int]: card

    Returns:
    - bool: received an action card
    """
    #skip
    if (card[1] == 10):
      self._index += self._dir

      #print(f"!!! skip card played") #debug
      return True

    #plus2
    elif (card[1] == 11):
      if (self._penalty == 1):
        self._penalty = 2
      else:
        self._penalty += 2

      #print(f"!!! plus2 card played") #debug
      return True

    #reverse
    elif (card[1] == 12):
      self._dir = -(self._dir)

      #print(f"!!! reverse card played") #debug
      return True

    return False


  def query_turn(self) -> bool:
    """
    query current agent for a move, then try to apply it.
    this function also gives the agent its reward after making a move.

    Return:
    - bool: true if agent successfully makes a legal move
    """
    agent = self.get_turn_agent()
    move = agent.move()

    #play a card
    if (self.is_playable_card(move)):
      #lose
      #play a card not in hand
      if (agent.remove_card(move) is False):
        print(f"ILLEGAL MOVE {move}")
        agent.add_reward(LOSE_GAME_REWARD)

      #successfully play a card
      self.add_to_discard(move)

      #win
      if (agent.empty_hand()):
        self._winner = self._index
        agent.reward(WIN_GAME_REWARD)

      #action card
      self.perform_card_action(move)

      self._index += self._dir #change whose turn it is next
      self._turn += 1 #total turn count increment
      agent.reward(GOOD_CARD_REWARD)
      return True

    #draw cards
    elif (self.is_drawing_move(move)):
      for i in range(self._penalty):

        new_card = self.get_card_from_deck()
        agent.add_card(new_card)

      self._penalty = 1

      self._index += self._dir
      self._turn += 1
      return True

    #lose
    agent.reward(LOSE_GAME_REWARD)
    return False


  def run_simulation(self, max_moves: int):
    """
    plays out the game according to agent's moves.

    Parameters:
    - int: limits number of moves to simulate

    Returns:
    - None
    """
    while (self._turn <= max_moves and self._winner is None):
      successful_turn = self.query_turn()

# Agent Interface

In [None]:
class UnoAgent(ABC):

  @abstractmethod
  def move(self) -> Tuple[int, int]:
    """
    which move to make

    Returns:
    - Tuple[int, int]: card to play, or (-1, -1) to draw
    """
    pass

  @abstractmethod
  def reward(self, value: int) -> int:
    """
    how much reward to get

    Parameter:
    - int: value of reward given to this agent

    Returns:
    - int:
    """
    pass


  def get_name(self):
    return self._name


  def get_hand(self) -> List[Tuple[int, int]]:
    """
    Returns:
    - list[tuple[int, int]]: list of current hand cards
    """
    return self._hand


  def get_hand_count(self) -> int:
    """
    Returns:
    - int: number of cards in hand
    """
    return len(self._hand)


  def add_card(self, card: Tuple[int, int]) -> bool:
    """
    adds a card to the hand

    Parameters:
    - Tuple[int, int]: card

    Returns:
    - bool: success
    """
    self._hand.append(card)
    return True


  def remove_card(self, card: Tuple[int, int]) -> bool:
    """
    removes a card from the hand

    Parameters:
    - int: the index of the card in the current hand to remove

    Returns:
    - bool: success
    """
    try:
      self._hand.remove(card)
      return True
    except ValueError:
      return False


  def empty_hand(self) -> bool:
    """
    checks for no cards in hand

    Returns:
    - bool: no cards in hand
    """
    if (len(self._hand) < 1):
      return True
    return False


  def clear_hand(self) -> bool:
    """
    removes all cards from hand

    Returns:
    - True
    """
    self._hand = []
    return True




# Basic Agent

In [None]:
class BasicAgent(UnoAgent):
  def __init__(self, name: str, game: UnoGameMaster):
    self._hand: List[Tuple[int, int]] = []
    self._name = name
    self._game = game

  def move(self) -> Tuple[int, int]:
    """
    picks the first playable card

    Returns:
    - Tuple[int, int]: card
    """
    for card in self._hand:
      if (self._game.is_playable_card(card)):
        return card
    return (-1, -1)


  def reward(self, value) -> int:
    return 0


# Encoding and Decoding

Encoding the state of the game, given the game and the agent.

* 52 neurons (1 per unique card)
* 17 neurons for the top card (4 for color and 13 for number)
* 3 for penalty (number of stacked +2 cards)

Total: 72 neurons

In [None]:
def encode_state(agent, game) -> np.ndarray:
  """
  encodes the current game state into a column vector

  Parameters:
  - UnoGameMaster: the game to encode

  Returns:
  - np.ndarray: column vector
  """

  #hand
  encoded_hand = np.zeros((4, 13))

  for card in agent.get_hand():
    color, number = card
    encoded_hand[color, number] += 1
  encoded_hand = encoded_hand.flatten()

  #game state
  encoded_top_card = np.zeros(17)

  color, number = game.get_top()

  encoded_top_card[color] = 1
  encoded_top_card[(color + 1) + number] = 1

  #penalty (maximum of 3 stacked +2 cards)
  encoded_penalty = np.zeros(3)
  penalty = game.get_penalty()

  if (penalty > 6):
    encoded_penalty[2] = 1
  elif (penalty > 1):
    encoded_penalty[int((penalty / 2) - 1)] = 1

  #put these vectors together
  out = np.concatenate((encoded_hand, encoded_top_card, encoded_penalty), axis=0)
  out = out.reshape(-1, 1)

  #print(encoded_hand) #debug
  #print(encoded_top_card) #debug
  #print(encoded_penalty) #debug

  return out


Decode the output of a decision.

* 17 neurons for the card to play (4 for color and 13 for number)
* 1 neuron if drawing a card

Total: 18 neurons

In [None]:
def decode_decision(decision_vector):
  draw_decision = (decision_vector[17] > 0.5).any()

  if (draw_decision):
    return (-1, -1)

  color_decision = np.argmax(decision_vector[:4])

  number_decision = np.argmax(decision_vector[4:17])

  return (color_decision, number_decision)

Helper functions for changing the weights of neural net

In [None]:
def ReLU(Z):
  #x if x > 0
  return np.maximum(Z, 0)

def sigmoid(Z):
  A = (1 / (1 + np.exp(-Z)))
  return A

def ReLU_deriv(Z):
  return Z > 0

def sigmoid_deriv(Z):
  A = sigmoid(Z)
  return (A * (1 - A))

# RLAgent

In [None]:
class RLAgent(UnoAgent):
  def __init__(self, name: str, game: UnoGameMaster):
    self._hand: List[Tuple[int, int]] = []
    self._name = name
    self._game = game

    #weight matrices
    self._w1 = np.random.randn(HIDDEN_LAYER_SIZE, 72)
    self._b1 = np.zeros((HIDDEN_LAYER_SIZE, 1))

    self._w2 = np.random.randn(18, HIDDEN_LAYER_SIZE)
    self._b2 = np.zeros((18, 1))

    #sequence of moves representing entire game episode
    self._trajectory = []

  def forward_prop(self, X):

    #input to hidden layer
    Z1 = (self._w1 @ X) + self._b1
    A1 = ReLU(Z1)

    #hidden layer to output
    Z2 = (self._w2 @ A1) + self._b2
    A2 = sigmoid(Z2)

    return A2


  def move(self) -> Tuple[int, int]:
    current_state = encode_state(self, self._game)
    forward_propagate = self.forward_prop(current_state)
    decision = decode_decision(forward_propagate)

    return decision


  def add_reward(self, state, move, reward) -> int:
    self._trajectory.append((state, move, reward))

    return reward

# Simulations



In [None]:
TestGame = UnoGameMaster()

TestAgent0 = BasicAgent("basic0", TestGame)
TestAgent1 = BasicAgent("basic1", TestGame)
TestAgent2 = BasicAgent("basic2", TestGame)

In [None]:
win_counter = [0, 0, 0]
for i in range(1000):
  TestGame.setup([TestAgent0, TestAgent1, TestAgent2])
  TestGame.run_simulation(200)

  if (TestGame._winner is not None):
    win_counter[TestGame._winner] += 1

print(win_counter)
print(sum(win_counter))

[346, 315, 334]
995


In [None]:
#random.seed(3)
#TestGame.setup([TestAgent0, TestAgent1, TestAgent2])

#print(TestAgent0.get_hand())
#print(TestAgent1.get_hand())
#print(TestAgent2.get_hand())

Trying out forward propagation

In [None]:
TestRL = RLAgent("RLAgent", TestGame)

TestGame.setup([TestAgent0, TestAgent1, TestRL])
#TestGame._penalty = 99
#print(TestGame.state())
#print(TestRL.get_hand())

print(TestRL.get_hand())
print(TestRL.move())
print()

try:
  TestGame.query_turn()
  TestGame.query_turn()
  TestGame.query_turn()
except ValueError as e:
  print(f"Error: {e}")

print(TestGame.state())

[(2, 4), (0, 2), (0, 0), (3, 8), (3, 9), (2, 10), (0, 1)]
(-1, -1)

deck:
[(2, 10), (3, 11), (3, 6), (0, 11), (1, 12), (3, 1), (1, 6), (0, 8), (2, 8), (0, 2), (2, 3), (3, 10), (3, 8), (2, 12), (2, 9), (2, 11), (1, 7), (3, 4), (2, 7), (2, 5), (2, 5), (3, 3), (3, 2), (2, 1), (2, 7), (2, 2), (0, 7), (0, 10), (0, 4), (1, 4), (2, 6), (3, 6), (3, 2), (3, 0), (2, 2), (3, 12), (0, 12), (3, 12), (0, 3), (1, 8), (1, 1), (1, 10), (3, 1), (2, 12), (0, 6), (0, 9), (0, 5), (2, 4), (1, 5), (0, 11), (3, 5), (2, 1), (2, 3), (3, 3), (1, 2), (1, 9), (1, 8), (1, 10), (2, 9), (1, 3), (2, 6), (0, 10), (3, 7), (3, 7), (0, 1), (1, 3), (3, 11), (3, 4), (3, 9), (1, 9), (0, 8), (0, 7), (0, 4), (3, 5), (0, 3), (1, 11), (0, 9)]
size: 77

discard:
[(1, 7), (1, 12), (1, 1)]
size: 3

top:
(1, 1)

agents:
['basic0', 'basic1', 'RLAgent']

turn:
4

penalty:
1

None
