<a href="https://colab.research.google.com/github/tniccum21/Cards/blob/master/simple_cards.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [129]:
"""
Simple cards - a platform for NN learning to play simple trick-based card games

Current status - platform for dealing hands and playing semi-random game built
To do:  build NN to learn/play

"""


import numpy as np
import random
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Input, Dense 
from tensorflow.keras.models import Model
from tensorflow.keras.models import load_model
import time 

CARD_PLAYED = 20      # flag that a card has been played
DECK_SIZE = 20        # number of cards in the deck
TRICK_REWARD = 1


In [306]:
#########################################################################
##  CLASS State
##    contains the complete state of the game as seen by one player
##    high-level:
##      records:
##        the trump suit
##        players current hand
##        recording of each previous round of play in the game, and the order played
##
##    initial constructor initializes the game state to zeros
##    print - prints a nice copy of the situation
##
##
class State:
    # State structure - applies to one game
    #   trump:    the trumps for each round of current game
    #   mycards:  cards in player's hand
    #   played:   cards played [x rounds, y cards per round]
    #   order:    who played the cards in played [x rounds, y cards per round]
    #   winner:   who won each round
    #   rewards:  my rewards by round

    def __init__(self, num_rounds=5, num_players=4):
        self.trump      = np.zeros(num_rounds, dtype=int)
        self.mycards    = np.zeros(num_rounds, dtype=int)
        self.played     = np.zeros((num_rounds, num_players), dtype=int)
        self.order      = np.zeros((num_rounds, num_players), dtype=int)
        self.winner     = np.zeros((num_rounds), dtype=int)
        self.reward     = np.zeros((num_rounds), dtype=float)

        self.trump.fill(-1)
        self.played.fill(-1)
        self.order.fill(-1)
        self.winner.fill(-1)

    def makeflat(self):        # make a flat vector of current round's state of play for storage in replay buffer
      a = self.mycards                # my current hand
      b = self.trump                  # currnet trump
      c = self.played.flatten()       # cards played in this game so far
      d = self.order.flatten()        # order of cards played in this game so far
      e = self.winner                 # round winners so far
      f = self.reward                 # rewards so far
      x = np.concatenate((a,b,c,d,e,f)) 
      return x

    def unflatten(self, v):
      self.mycards = v[0:4]
      self.trump   = v[5:9]
      m = list(v[10:29])
      self.played  = np.array(m).reshape(5, 4)
      m = list(v[30:49])
      self.order   = np.array(m).reshape(5, 4)
      self.winner  = v[50:54]
      self.reward  = v[55:59]

    def print(self, labels=False):
        pretty_cards = []
        pretty_played = []
        pretty_order = []
        for i in range(5):
              f = face(self.mycards[i])
              pretty_cards.append(f)
        for i in range(5):      
            if sum(self.played[i]) != 0:
              pretty_played.append('Round '+str(i)+'('+str(self.trump[i])+': ')
              pretty_order.append('Round '+str(i)+': ')

              for j in range(4):
                pretty_played.append(face(self.played[i][j]))
                pretty_order.append(str(self.order[i][j])+' ')


        print("  Trump:   ", self.trump)  
        print("  My Hand: ", pretty_cards)
        print("  Played:  ", pretty_played)
        print("  Order:   ", pretty_order)
        print("  Reward:   ", self.reward)

# updates the state with a play
# inputs:
#     round:  the round number of the current game
#     play:   the play number of the current game (0 = leadout)
#     player: the player playing this card
#     card:   the card being played
#     round*5 +1 gets to the head of the array for recording each round and stores the card played
#     round*5 +20 gets to the head of the array for recording the order of play within the round, and stores the player number
    def update(round, play, player, card):
        self.played[round, play] = card 
        self.play_order[round, play] = player
        return None



In [154]:
################################################################################
# CLASS Agent:
#    
#  quick and dirty "strategy" for robotic player imporovement...
#    simply checks to see if:
#       A.  If we can win by moving to a square
#       B.  If we can't win do we need to block the other guy...
#       returns a list of suggested moves
############## NN Model Class ######################
class Agent():
    def __init__(self, load=False, filename=''):
        self.learning_rate = 0.05 # default learning rate   
        self.best_move_smoothing = 0.1 # when several next best moves are roughly equal, choose randomly
        self.randocount = 0
        self.NNcount = 0
        self.epsiloncount = 0
        self.movecount = 0
        self.bummers = []
        if load == True:
          self.model = load_model(filename)

#model design:
#       input layer: size of state 
#       hidden layers
#       output layer: size of possible action list        

    def create(self, state_dim, action_dim, hidden_layers=3, hidden_dim=(64, 64, 64), debug=False):
        i = Input(shape=(state_dim,))
        x = i
        for k in range(hidden_layers):
            x = Dense(hidden_dim[k], activation='relu')(x)
        x = Dense(action_dim)(x) # output layer
        self.model = Model(i, x) 
        self.model.compile(optimizer='adam',
                           loss='mse',
                           metrics=['accuracy'])
    
        return self.model
    
    def train_on_batch(self, X, Y):
      loss_batch, acc_batch = self.model.train_on_batch(X, Y)
      loss = loss_batch
      print(loss, acc_batch)
      return loss 

    def print():
      print((self.model.summary()))

    def save(self, filename):
      self.model.save(filename)

    def predict(self, state):
        return self.model.predict(state)

    def act_learner(self, state, player): # state is the current board
        self.movecount += 1
        if np.random.rand() <= self.epsilon:
            self.epsiloncount += 1
            return robot_player_learner(state, player)
        sf = np.zeros((1,9))
        sf[0] = state.board.flatten()

        act = self.model.predict(sf) 
 
        m = get_best_legal_move(state, act[0], self.best_move_smoothing, debug=False)
        self.NNcount += 1
        return m            
    
    def act_trainer(self, state, player): # state is the current board
        self.movecount += 1
        if np.random.rand() <= self.epsilon:
            self.epsiloncount += 1
            return robot_player_teacher(state, player)
        sf = np.zeros((1,9))
        sf[0] = state.board.flatten()

        act = self.model.predict(sf) 
 
        m = get_best_legal_move_with_checking(state, act[0], self.best_move_smoothing, debug=False)
        self.NNcount += 1
        return m            
    


In [263]:
#################################################################################
## Class ReplayBuffer
##
## inputs:  obs_dim: observation dimensionality
##          act_dim: action dimensionality
##          size:    number of slots in the buffer
##
##  Idea here is to record game states and actions, along with rewards earned.
##  When sufficent activity has happened in the game play, and recoded in the buffer,
##  we randomly sample from the buffer in batches to train our model.
#################################################################################
class ReplayBuffer:
  def __init__(self, obs_dim, act_dim, buffersize=500):
    self.epsilon = 1.0
    self.epsilon_min = 0.01
    self.epsilon_decay = 0.999

    self.state_size = obs_dim
    self.action_size = act_dim

    self.current_state_buf = np.zeros([buffersize, obs_dim], dtype=np.uint8)  
    self.action_buf = np.zeros(buffersize, dtype = np.uint8)  
    self.reward_buf = np.zeros(buffersize, dtype = np.float32)  
    self.ptr, self.size, self.max_size = 0, 0, buffersize

  def store(self, current_state, action, reward):
    self.current_state_buf[self.ptr] = current_state
    self.action_buf[self.ptr] = action
    self.reward_buf[self.ptr] = reward
    self.ptr = (self.ptr+1) % self.max_size
    self.size = min(self.size+1, self.max_size)
      
  def sample(self, batch_size=32):  
    idxs = np.random.randint(0, self.size, size=batch_size)
    return dict(s=self.current_state_buf[idxs],
                a=self.action_buf[idxs],
                r=self.reward_buf[idxs])

  def print(self, batch):
        print("\n")
        for i in range(len(batch["s"])):
            print(i, "STATE: ", batch["s"][i], "ACTION: ", batch["a"][i], "REWARD: ", batch["r"][i])
        print("-----------------------------------\n")
            

  def train_one_batch(self, agent, batch_size=32, debug=False):
        #sample a batch of data from the replay memory
        if self.size < batch_size:
            return
        minibatch = self.sample(batch_size)
        states = minibatch['s']
        actions = minibatch['a']
        rewards = minibatch['r']
        target = rewards 

        target_full = agent.predict(states)

        target_full[np.arange(len(actions)), actions] = target
        
        loss = agent.train_on_batch(states, target_full)

        predict_post = agent.predict(states)

        if self.epsilon > self.epsilon_min:
              self.epsilon *= self.epsilon_decay
        else:
              self.epsilon = self.epsilon_min
              


In [264]:
"""
Miscellaneous helper functions and definitions
"""

#############################################################
## Deck
##
#############################################################
deck_value = {"AH": 5, "KH": 4, "QH": 3, "JH": 2, "TH": 1, \
                   "AD": 5, "KD": 4, "QD": 3, "JD": 2, "TD": 1, \
                   "AS": 5, "KS": 4, "QS": 3, "JS": 2, "TS": 1, \
                   "AC": 5, "KC": 4, "QC": 3, "JC": 2, "TC": 1, \
                   "--": 0}

deck_decode = ["AH", "KH", "QH", "JH", "TH", \
               "AD", "KD", "QD", "JD", "TD", \
               "AS", "KS", "QS", "JS", "TS", \
               "AC", "KC", "QC", "JC", "TC", \
               "--"]
class Deck():
    def __init__(self, cards_in_deck=DECK_SIZE):
        self.cards_in_deck = cards_in_deck

    def create(self):
        self.deck = np.arange(self.cards_in_deck, dtype=int )
        return self.deck

    def deal(self, num_hands=4, cards_per_hand=5, number_of_shuffles=1):
      hands = np.zeros((num_hands,cards_per_hand), dtype=int)
      idx = list(range(0, self.cards_in_deck))
      for i in range(number_of_shuffles):
        random.shuffle(idx)
      for i in range(cards_per_hand):
        for j in range(num_hands):
          hands[j][i] = idx[i*4+j]
      return hands

def suit(card):
    return deck_decode[card][-1]

def face(card):
    return deck_decode[card]

def is_trump(card, trump):
    return suit(card) == trump

def value(card):
    return deck_value[deck_decode[card]]

def pick_trump():
  suits = ['H', 'D', 'C', 'S']
  trump = suits[random.randint(0,3)]
  return trump

def trump_to_num(trump):
  suits = ['H', 'D', 'C', 'S']
  trumpnum = suits.index(trump)
  return trumpnum

def show_hand(hand):
  h = []
  for card in hand:
    h.append(face(card))
  return h

def card_compare(card1, card2, trump):
    suit1 = suit(card1)
    suit2 = suit(card2)
    val1 = value(card1)
    val2 = value(card2)
    
    if is_trump(card1, trump) and (not is_trump(card2, trump)): # card1 was trump, card2 not card 1wins
      return card1
    if is_trump(card2, trump) and (not is_trump(card1, trump)): # card2 was trump, card1 not, so card2 wins  
      return card2
    if is_trump(card1, trump) and is_trump(card2, trump): # both trump, higher value wins
      if val1 >= val2:
        return card1
      else:
        return card2
    if suit1 != suit2:  # no trump - id card2 follow suit?
      return card1        # no, card1 wins
    else:                 # followed suit, so... compare value
      if val2 > val1:     # biggest card wins... card1 unless card2 is bigger
        return card2
      else:
         return card1
    print("ERROR IN card_compare: ", card1, card2)
    return -1          

def who_won_round(trump, round_state, play_order, debug=0):
  # evaluate cards played to determine winner
  # if trump played, highest trump wins
  # if no_trump played, highest of lead suit played wins
  best_card = round_state[0]
  best_player = play_order[0]
  for i in range(len(round_state)):
    if debug > 2:
      print("player ", play_order[i], " played ", face(round_state[i]))
    test_card = card_compare(best_card, round_state[i], trump)
    if test_card != best_card:
      best_card = test_card
      best_player = play_order[i]
  return best_player, best_card

def what_is_winning_round(trump, round_state, debug=0):
  # evaluate cards played to determine current winning card
  # if trump played, highest trump wins
  # if no_trump played, highest of lead suit played is winning

  best_card = round_state[0]
  if len(round_state) == 1:
    return best_card
  if debug > 3: 
    print("what is winning: ", round_state, len(round_state))
  for i in range(1, len(round_state)):
    test_card = card_compare(best_card, round_state[i], trump)
    if test_card != best_card:
      best_card = test_card
  return best_card


In [277]:
"""
MAIN ACTION SECTION:
  - play_one_card
  - play_one_hand
  - play_one_game
"""
def play_one_card_bot(hands, trump, player, round_state, epsilon, debug=0):
  # if am i lead?
  #   play winner or slough? 
  # else can i follow suit?
  # else do I slough... or trump!
  
  # preprocess my hand
    if np.random.random() < epsilon:
      play, hands = play_one_card(hands, trump, player, round_state, debug=0)
    else:
      play, hands = play_one_card(hands, trump, player, round_state, debug=0)
    return play, hands



def play_one_card(hands, trump, player, round_state, debug=0):
  # if am i lead?
  #   play winner or slough? 
  # else can i follow suit?
  # else do I slough... or trump!
  
  # preprocess my hand
    mycards = np.where(hands[player] != CARD_PLAYED)
    playable = hands[player][mycards]
    card_face = []
    card_value = []
    card_suit = []
    card_trump = []
    card_index = []
    
    for card in playable:
      card_face.append(face(card))
      card_value.append(value(card))
      card_suit.append(suit(card))    
      card_trump.append(is_trump(card, trump))
      card_index.append(card)

    if len(round_state) == 0:  # i'm lead...
      if debug > 4: 
        print("I'm lead, my cards: ", show_hand(playable))
      # play highest non-trump
      idx = [i for i in range(len(card_value)) ]
      high = 0
      high_idx = 0
      card_idx = 0
      if len(idx) > 0: # we can follow suit
        if debug > 4:
          print("Following suit...", idx)
        for i in idx:
          if high < card_value[i]:
              high = card_value[i]
              high_idx = i    
      play = card_index[high_idx]
      hands[player][mycards[0][high_idx]] = CARD_PLAYED
      suit_to_follow = suit(play)

    else:
      suit_to_follow = suit(round_state[0])
      if debug > 4:
        print("I'm follower, trump is: ", trump, "suit to follow is: ", suit_to_follow, "my cards: ", show_hand(playable))
      # play highest non-trump in suit
      # check to see winning card in current play
      current_winning_card = what_is_winning_round(trump, round_state)
      if debug > 4:
        print("Current winning card: ", face(current_winning_card), is_trump(current_winning_card, trump))
      #if current winning card is trump:
      # if we have to follow suit, go low
      # if we have to slough go short suit
      # if we can over-trump, do it
      follow_go_low = False
      over_trump = False

      if is_trump(current_winning_card, trump):
        follow_go_low = True
        over_trump = True

      idx = [i for i in range(len(playable)) if card_suit[i] == suit_to_follow ]
      high = 0
      low = 100
      card_idx = 0
      if len(idx) > 0: # we can follow suit
        if debug > 4:
          print("Following suit...", idx)
        for i in idx:
          if high < card_value[i]:
              high = card_value[i]
              high_idx = i    
          if low > card_value[i]:
              low = card_value[i]
              low_idx = i
        # we have our highest and lowest suit followers
        if debug > 4:
          print("high/low follwer: ", face(card_index[high_idx]), face(card_index[low_idx]))

        if value(current_winning_card) > value(card_index[high_idx]): # we can't beat the current winning card, so go low in following suit
          follow_go_low = True

        if follow_go_low:
          card_idx = low_idx
          if debug > 4:
            print("going low")
        else:
          card_idx = high_idx
          if debug > 4:
            print("going high")

      else: # can't follow suit... can we trump it?
        idx = [i for i in range(len(card_trump)) if card_trump[i] == True]
        if len(idx) > 0: # we can trump
          if len(idx) == 1: # we only have one trump, so use it
            card_idx = idx[0]
          else:  # we have more than one trump - choose

            high = 0
            low = 100
            card_idx = 0
            for i in idx: # find my high and low trump
              if high < card_value[i]:
                high = card_value[i]
                high_idx = i
              if low > card_value[i]:
                low = card_value[i]
                low_idx = i  
            if over_trump:
                card_idx = high_idx
                if debug > 4:
                  print("Over Trump it!", card_idx)
            else: # play low trump
                 card_idx = low_idx
                 if debug > 4:
                   print("Under Trump it!", card_idx)
        else:  # no trump, let's slough a loser
          idx = [i for i in range(len(playable))] # get list of cards
          if len(idx) > 0: # we can slough
              low = 100
              card_idx = 0
              for i in idx: # play lowest slough
                if low > card_value[i]:
                   low = card_value[i]
                   card_idx = i

      play = card_index[card_idx]
      if debug > 4:
        print("Playing: ", face(card_index[card_idx]))
      hands[player][mycards[0][card_idx]] = CARD_PLAYED
    return play, hands


In [274]:

def play_one_game(replay_buffer, deck, dealer, num_players=4, rounds_in_game=5, debug=0):
  # dealer = passed in from overall main game
  # instantiate State for each player
  # deal cards
  # pick trump
  # lead = (dealer + 1) % 4
  # for each round (rounds_in_game), for each player (num_players) play_one_card
  # rotate the dealer for the next round

  hands = deck.deal(num_hands=num_players,        # deal out the hands
                    cards_per_hand=rounds_in_game, 
                    number_of_shuffles=2)
  
  trump = pick_trump()                            # RULE: trump is chosen randomly
  playerstate = [0] * num_players                 # create a list to hold the states during the game

  for i in range(num_players):                    # for each player:
    playerstate[i] = State(rounds_in_game, 
                           num_players)           # initialize state object           
    playerstate[i].mycards = np.sort(hands[i])    # move players' hands to player state


  tricks = [0] * num_players                      # initialize trick counter
  lead = (dealer + 1) % num_players               # initial lead to dealers left
  
  for round in range(rounds_in_game):             # loop through number of rounds in a game
    c = 0                                         # card played for indexing into player state
    current_state = []                            # gonna store the current_state of game before we choose action
    player_up = []                                # the player playing with current_state
    action = []                                   # the action taken based on current_state
    reward = []                                   # the reward gained as result of current_state + action

    for p in range(lead, lead+num_players):       # play starting with the lead player
      player = p % num_players                    # rotate back through player 0 if need be
 
      if debug > 2:
        print("ROUND: ", round, "Player: ", player)

      current_state.append(playerstate[player])   # add the current game state to the list of states
      player_up.append(player)                    # index with the current player for later retrieval

      if player == 0:
        play, hands = play_one_card_bot(hands,          # generate the next card to play
                                    trump, 
                                    player, 
                                    playerstate[player].played[round],
                                    replay_buffer.epsilon) 
      else:
        play, hands = play_one_card(hands,      # generate the next card to play with NN
                                    trump, 
                                    player, 
                                    playerstate[player].played[round]) 
      
      action.append(play)                         # save the played card 

      for q in range(num_players):                # for all players, update the game history
        playerstate[q].played[round][c] = play    # update the play history
        playerstate[q].order[round][c] = player   # update the order of play history
        playerstate[q].trump[round] = trump_to_num(trump)     # update the trump for this round
      c += 1                                      # update the card played count
      
      if debug > 2:
        print("Player %1d plays %3s" % (player, face(play)))

      
    winner, wc = who_won_round(trump,             # determine winner of round - can use any player's history
                                playerstate[0].played[round], 
                                playerstate[0].order[round])  
    
   
    tricks[winner] += 1                           # track total tricks taken
    lead = winner                                 # winner leads on next round
  
    for q in range(num_players):                  # set who gets the REWARD on this round
      if player_up[q] == winner:
        reward.append(TRICK_REWARD)
      else:
        reward.append(0)                          # others get 0
      playerstate[q].winner[round] = winner       # update the game state
      if winner == q:
        playerstate[q].reward[round] = TRICK_REWARD
      else:
        playerstate[q].reward[round] = 0
      
      if TRAINING:                                # store the plays for this round in the replay buffer
        replay_buffer.store(current_state[q].makeflat(), 
                          action[q], 
                          reward[q])
        replay_buffer.train_one_batch(brain, 
                                      batch_size=32, 
                                      debug=True)

    if debug > 2:
      print("----- End of ROUND Results -----")
      print("Trump was: ", trump)

      for i in range(num_players):
        print("Player ", i, "took ", tricks[i], "tricks.")
        playerstate[i].print()
        print("-------------------------------")
        print("action: ", action)
        print("reward: ", reward)
        print("-------------------------------")

  return tricks


In [275]:
def play_one_tournament(replay_buffer, winning_score=10, num_players=4, rounds_in_game=5, debug=0):
  deck = Deck(DECK_SIZE)
  score = [0] * num_players
  dealer = np.random.randint(0,num_players)  # start each tournament with random dealer
  games_played_this_tournament = 0

  while True:
    if (max(score) >= winning_score): 
      # ensure no ties
      w = max(score)
      if score.count(w) == 1:
        break

    deck.create()  
    tricks = play_one_game(replay_buffer, deck, dealer, num_players=num_players, rounds_in_game=rounds_in_game, debug=debug)
    for i in range(num_players):
      score[i] += tricks[i]  
    dealer = (dealer + 1) % num_players
    games_played_this_tournament += 1

  if debug > 0:
        print("\n========= End of Tournament Score ========Games: ", games_played_this_tournament)
        for i in range(num_players):
          print("Player ", i, "Score ", score[i])
        print("\n===========================================")

  return score, games_played_this_tournament

In [None]:
###############################
# Testing loop
################################
#game hyperparameters
num_players = 4
win_score = 11
debug_level = 0
rounds_in_game = 5

#stats initialization
scores = [0] * num_players    
tournaments = 100
tournaments_won = [0] * num_players
tricks_in_tournament = 0
total_games_played = 0

# buffer initialize (IF TRAINING)
TRAINING = True
rpbuf = ReplayBuffer(60, 20, 500)
brain = Agent(load=False)
brain.create(60, 20, hidden_layers=3, hidden_dim=(256, 512, 256), debug=False)



for i in range(tournaments):    
    score, games_played_this_tournament = play_one_tournament(rpbuf, 
                                                              winning_score=win_score, 
                                                              num_players=num_players, 
                                                              rounds_in_game=rounds_in_game, 
                                                              debug=debug_level)
    for j in range(num_players):
      scores[j] += score[j]
    tricks_in_tournament += sum(score)
    tournament_winner = np.argmax(score)
    tournaments_won[tournament_winner] += 1
    total_games_played += games_played_this_tournament

    if ((i + 1) % 1000) == 0:
      print("\n========= OVERALL Tournament wins %4d/%4d ==========" % (i+1, tournaments))
      print("Player Wins   Win pct")
      for j in range(num_players):
        print("  %1d    %5d    %2.2f" %  (j+1, tournaments_won[j], tournaments_won[j]/sum(tournaments_won)))
      print("\n========= OVERALL Tricks taken  out of %4d==========" % (sum(scores)))
      print("Player Hands  Win pct")
      for j in range(num_players):
        print("  %1d    %5d    %2.2f" %  (j+1, scores[j], scores[j]/sum(scores)))
      print("\n=========================================")

      print(" Total tricks played: ", tricks_in_tournament)
      print(" Total games played: ", total_games_played)
      print(" Total tournaments played: ", sum(tournaments_won))
      print(" Average tricks per Tournament: ", tricks_in_tournament / sum(tournaments_won))
      print(" Average games per Tournament: ", total_games_played / sum(tournaments_won))
      print(" Average tricks per game: ", tricks_in_tournament / total_games_played)


In [None]:
batch = rpbuf.sample(100)
rpbuf.print(batch)

In [279]:
y = batch['s']

In [280]:
print(y.shape)


(100, 60)


In [None]:
p = brain.predict(y)
z = np.argmax(p, axis=1)
z.shape
print(p)
print(z)


In [307]:
s = State()
y[58]
s.unflatten(y)


ValueError: ignored

In [300]:
x = np.zeros((2,2))
x.shape

(2, 2)

In [301]:
x.reshape(1,4)

array([[0., 0., 0., 0.]])