In [1]:
# Run this cell to mount your Google Drive.

from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
pth = '/content/drive/MyDrive/Colab Notebooks/Thesis'

In [209]:
all_classes = ['SP0','SP1','Draw','Pickup','DH','GIN',
               'AS', '2S', '3S', '4S', '5S', '6S', '7S', '8S', '9S', 'TS', 'JS', 'QS', 'KS',
               'AH', '2H', '3H', '4H', '5H', '6H', '7H', '8H', '9H', 'TH', 'JH', 'QH', 'KH',
               'AD', '2D', '3D', '4D', '5D', '6D', '7D', '8D', '9D', 'TD', 'JD', 'QD', 'KD',
               'AC', '2C', '3C', '4C', '5C', '6C', '7C', '8C', '9C', 'TC', 'JC', 'QC', 'KC',
               'AS', '2S', '3S', '4S', '5S', '6S', '7S', '8S', '9S', 'TS', 'JS', 'QS', 'KS',
               'AH', '2H', '3H', '4H', '5H', '6H', '7H', '8H', '9H', 'TH', 'JH', 'QH', 'KH',
               'AD', '2D', '3D', '4D', '5D', '6D', '7D', '8D', '9D', 'TD', 'JD', 'QD', 'KD',
               'AC', '2C', '3C', '4C', '5C', '6C', '7C', '8C', '9C', 'TC', 'JC', 'QC', 'KC']


# Gin Rummy

## Imports

In [107]:
#-------------------------------------------------------------------------------
# The following code was originally written by Todd Neller in Java.
# It was translated into Python by Anthony Hein.
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
# A class for modeling a game of Gin Rummy
# @author Todd W. Neller
# @version 1.0
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
# Copyright (C) 2020 Todd Neller
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# Information about the GNU General Public License is available online at:
#   http://www.gnu.org/licenses/
# To receive a copy of the GNU General Public License, write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
#-------------------------------------------------------------------------------

import random
import time
import numpy as np
import os
import torch

%cd /content/drive/My Drive/Colab Notebooks/Thesis/GinRummy

from Deck import Deck
from GinRummyUtil import GinRummyUtil
from SimpleGinRummyPlayer import SimpleGinRummyPlayer

%cd /content/drive/My Drive/Colab Notebooks/Thesis/SupervisedLearning

from models import *

%cd /content/drive/My Drive/Colab Notebooks/Thesis
#-------------------------------------------------------------------------------

# TRACKING
# Plane (5x52)      Feature
# 0	 currHand       the cards in current player's hand
# 1	 topCard        the top card of the discard pile
# 2	 deadCard       the dead cards: cards in discard pile (excluding the top card)
# 3	 oppCard        opponent known cards: cards picked up from discard pile, but not discarded
# 4	 unknownCard    the unknown cards: cards in stockpile or in opponent hand (but not known)

# Action ID         Action
# 0	                score_player_0_action
# 1	                score_player_1_action
# 2	                draw_card_action
# 3	                pick_up_discard_action
# 4	                declare_dead_hand_action
# 5	                gin_action
# 6 - 57	        discard_action
# 58 - 109	        knock_action

# Knock_bin
# Action ID         Action
# 0	                No Knock
# 1	                Knock

def one_hot(cards):
    ret = np.zeros(52)
    for card in cards:
        ret[card.getId()] = 1
    return ret

def un_one_hot(arr):
    rankNames = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "T", "J", "Q", "K"]
    suitNames = ['S', 'H', 'D', 'C']
    ret = []
    for i in range(len(arr)):
        if arr[i] != 0:
            ret.append(rankNames[i%13] + suitNames[i//13])
    return ret

#-------------------------------------------------------------------------------

/content/drive/My Drive/Colab Notebooks/Thesis/GinRummy
/content/drive/My Drive/Colab Notebooks/Thesis/SupervisedLearning
/content/drive/My Drive/Colab Notebooks/Thesis


## MLPGinRummyPlayer

In [308]:
# -------------------------------------------------------------------------------
#  MLPGinRummyPlayer
#
#  This estimation will be calculated using a Multilayer Percepton trained on the
#  SimpleGinRummyPlayer written
#  by Calvin Tan.
#
#  @author Calvin Tan
#  @version 1.0
# -------------------------------------------------------------------------------

# -------------------------------------------------------------------------------
# The following code was originally written by Todd Neller in Java.
# It was translated into Python by May Jiang.
# -------------------------------------------------------------------------------

# -------------------------------------------------------------------------------
# Copyright (C) 2020 Todd Neller
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# Information about the GNU General Public License is available online at:
#   http://www.gnu.org/licenses/
# To receive a copy of the GNU General Public License, write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
# -------------------------------------------------------------------------------

from typing import List, TypeVar
from random import randint
from GinRummyUtil import GinRummyUtil
from GinRummyPlayer import GinRummyPlayer

# Import MLP Models
# from SupervisedLearning.models import *

Card = TypeVar('Card')

class MLPGinRummyPlayer(GinRummyPlayer):

    def loadModel(self, model_pt):
        print('Load Model')
        self.model = model_pt

    def updateStates(self, states):
        print('Update States')
        self.state = states

    def knockAction(self) -> bool:
        return self.knock

    # Inform player of 0-based player number (0/1), starting player number (0/1), and dealt cards
    def startGame(self, playerNum: int, startingPlayerNum: int, cards: List[Card]) -> None:
        self.playerNum = playerNum
        self.startingPlayerNum = startingPlayerNum
        self.cards = list(cards)
        self.opponentKnocked = False
        self.drawDiscardBitstrings = [] # long[], or List[int]
        self.faceUpCard = None
        self.drawnCard = None
        self.state = None
        self.knock = False

    # Return whether or not player will draw the given face-up card on the draw pile.
    def willDrawFaceUpCard(self, card: Card) -> bool:
        # BPBD, either draw(2)->True or pickup(3)->False
        state = np.expand_dims(self.state, axis=0)
        action = self.model(torch.from_numpy(state).type(torch.FloatTensor))
        action = action.detach().numpy().reshape(-1)
        print('Pickup from discard:', action[2])
        print('Draw new card:', action[3])
        if action[3] > action[2]:
            print('Draw Action')
            return True
        print('Pickup Action')
        return False


    # Report that the given player has drawn a given card and, if known, what the card is.
    # If the card is unknown because it is drawn from the face-down draw pile, the drawnCard is null.
    # Note that a player that returns false for willDrawFaceUpCard will learn of their face-down draw from this method.
    def reportDraw(self, playerNum: int, drawnCard: Card) -> None:
        # Ignore other player draws.  Add to cards if playerNum is this player.
        if playerNum == self.playerNum:
            self.cards.append(drawnCard)
            self.drawnCard = drawnCard


    # Get the player's discarded card.  If you took the top card from the discard pile,
    # you must discard a different card.
    # If this is not a card in the player's possession, the player forfeits the game.
    # @return the player's chosen card for discarding
    def getDiscard(self) -> Card:
        # APBD, either either discard or knock...
        # determine the allowable actions (which cards can be discarded/knocked on)
        currHand = np.array(self.state[0:52])
        # disallow discarding PickUp
        # print(self.drawnCard)
        # print('Old Hand:', un_one_hot(currHand))
        currHand[self.drawnCard.getId()] = 0
        # print('Current Hand:', un_one_hot(currHand))
        
        state = np.expand_dims(self.state, axis=0)
        action = self.model(torch.from_numpy(state).type(torch.FloatTensor))
        action = action.detach().numpy().reshape(-1)
        print(all_classes[np.argmax(action)], np.argmax(action))
        discardMax = max(currHand * action[6:58])
        knockMax = max(currHand * action[58:110])
        print('MAX:{}, {}'.format(discardMax, knockMax))
        if discardMax > knockMax:
            print('Discard Action')
            self.knock = False
            return Deck.getCard(np.argmax(currHand * action[6:58]))
        else:
            print('Knock Action')
            self.knock = True
            return Deck.getCard(np.argmax(currHand * action[58:110]))




















    # Report that the given player has discarded a given card.
    def reportDiscard(self, playerNum: int, discardedCard: Card) -> None:
        # Ignore other player discards.  Remove from cards if playerNum is this player.
        if playerNum == self.playerNum:
            self.cards.remove(discardedCard)

    # At the end of each turn, this method is called and the player that cannot (or will not) end the round will return a null value.
    # However, the first player to "knock" (that is, end the round), and then their opponent, will return an ArrayList of ArrayLists of melded cards.
    # All other cards are counted as "deadwood", unless they can be laid off (added to) the knocking player's melds.
    # When final melds have been reported for the other player, a player should return their final melds for the round.
    # @return null if continuing play and opponent hasn't melded, or an ArrayList of ArrayLists of melded cards.
    def getFinalMelds(self) -> List[List[Card]]:
        # Check if deadwood of maximal meld is low enough to go out.
        bestMeldSets = GinRummyUtil.cardsToBestMeldSets(self.cards) # List[List[List[Card]]]
        if not self.opponentKnocked and (len(bestMeldSets) == 0 or \
            GinRummyUtil.getDeadwoodPoints1(bestMeldSets[0], self.cards) > \
            GinRummyUtil.MAX_DEADWOOD):
            return None
        if len(bestMeldSets) == 0:
            return []
        return bestMeldSets[randint(0, len(bestMeldSets)-1)]

    # When an player has ended play and formed melds, the melds (and deadwood) are reported to both players.
    def reportFinalMelds(self, playerNum: int, melds: List[List[Card]]) -> None:
        # Melds ignored by simple player, but could affect which melds to make for complex player.
        if playerNum != self.playerNum:
            self.opponentKnocked = True

    # Report current player scores, indexed by 0-based player number.
    def reportScores(self, scores: List[int]) -> None:
        # Ignored by simple player, but could affect strategy of more complex player.
        return

    # Report layoff actions.
    def reportLayoff(self, playerNum: int, layoffCard: Card, opponentMeld: List[Card]) -> None:
        # Ignored by simple player, but could affect strategy of more complex player.
        return

    # Report the final hands of players.
    def reportFinalHand(self, playerNum: int, hand: List[Card]) -> None:
        # Ignored by simple player, but could affect strategy of more complex player.
        return

## Game Definition

In [309]:
class GinRummyGame:

    # Hand size (before and after turn). After draw and before discard there is one extra card.
    HAND_SIZE = 10;

    # Whether or not to print information during game play
    playVerbose = False;

    # Two Gin Rummy players numbered according to their array index.
    players = [];

    # Set whether or not there is to be printed output during gameplay.
    def setPlayVerbose(playVerbose):
        GinRummyGame.playVerbose = playVerbose
    
    #-------------------------------- appendStates --------------------------------#
    # 2020-12-20: Define a method to append states
    # 2021-01-16: modified append state to work for either player (0 or 1)
    def appendState(self, currentPlayer, discards, oppCard):
        currHand = one_hot(GinRummyGame.players[currentPlayer].cards)
        topCard = np.zeros(52)
        if len(discards) > 0:
            topCard[discards[-1].getId()] = 1
        deadCard = np.zeros(52)
        for d in range(len(discards) - 1):
            deadCard[discards[d].getId()] = 1
        unknownCard = np.ones(52) - currHand - topCard - deadCard - oppCard
        states.append(np.array([currHand, topCard, deadCard, oppCard, unknownCard]).flatten())
    #------------------------------------------------------------------------------#

    # Create a GinRummyGame with two given players
    def __init__(self, player0, player1):
        GinRummyGame.players.extend([player0, player1])

    # Play a game of Gin Rummy and return the winning player number 0 or 1.
    # @return the winning player number 0 or 1

    # 2020-12-20: Add parameter to select what data to generate
    def play(self,state='all',action='all'):
        scores = [0, 0]
        hands = []
        hands.extend([[], []])

        startingPlayer = random.randrange(2);

        # while game not over
        while scores[0] < GinRummyUtil.GOAL_SCORE and scores[1] < GinRummyUtil.GOAL_SCORE:

            currentPlayer = startingPlayer
            opponent = (1 if currentPlayer == 0 else 0)

            # get shuffled deck and deal cards
            deck = Deck.getShuffle(random.randrange(10 ** 8))
            hands[0] = []
            hands[1] = []
            for i in range(2 * GinRummyGame.HAND_SIZE):
                hands[i % 2] += [deck.pop()]
            for i in range(2):
                GinRummyGame.players[i].startGame(i, startingPlayer, hands[i]);
                if GinRummyGame.playVerbose:
                    print("Player %d is dealt %s.\n" % (i, hands[i]))
            if GinRummyGame.playVerbose:
                print("Player %d starts.\n" % (startingPlayer))
            discards = []
            discards.append(deck.pop())
            if GinRummyGame.playVerbose:
                print("The initial face up card is %s.\n" % (discards[len(discards) - 1]))
            firstFaceUpCard = discards[len(discards) - 1]
            turnsTaken = 0
            knockMelds = None

            # 11/25 - Initial state, prior to any cards
            # 1/16 - Initialize oppCard to be two dimensional to track both players as opponents
            oppCard = []
            oppCard.extend([np.zeros(52), np.zeros(52)])

            # while the deck has more than two cards remaining, play round
            while len(deck) > 2:
#-------------------------------------------------------------- BPBD --------------------------------------------------------------#
                drawFaceUp = False
                faceUpCard = discards[len(discards) - 1]

                # offer draw face-up iff not 3rd turn with first face up card (decline automatically in that case)
                if not (turnsTaken == 2 and faceUpCard == firstFaceUpCard):

                    #------------------------------------ DRAW ------------------------------------#
                    # 2020-12-01  -  Track states BEFORE the player PICKUP BEFORE player DISCARDS (track_bpbd)
                    # 2021-01-16  -  Track for both players instead of just player 0
                    # Action      -  PickUp from Discard(FaceUp) or Deck (Unknown)
                    # State       -  BPBD -> APBD

                    self.appendState(currentPlayer,discards,oppCard[currentPlayer])

                    #------------------------------------------------------------------------------#

                    # 2021-01-16  -  Update player with current states
                    if isinstance(GinRummyGame.players[currentPlayer], MLPGinRummyPlayer):
                        GinRummyGame.players[currentPlayer].updateStates(states[-1])


                    # both players declined and 1st player must draw face down
                    # if isinstance(GinRummyGame.players[currentPlayer], SimpleGinRummyPlayer):
                    drawFaceUp = GinRummyGame.players[currentPlayer].willDrawFaceUpCard(faceUpCard)
                    
                    if GinRummyGame.playVerbose and not drawFaceUp and faceUpCard == firstFaceUpCard and turnsTaken < 2:
                        print("Player %d declines %s.\n" % (currentPlayer, firstFaceUpCard))

                if not (not drawFaceUp and turnsTaken < 2 and faceUpCard == firstFaceUpCard):

                    # continue with turn if not initial declined option
                    drawCard = discards.pop() if drawFaceUp else deck.pop()
                    for i in range(2):
                        to_report = drawCard if i == currentPlayer or drawFaceUp else None
                        GinRummyGame.players[i].reportDraw(currentPlayer, to_report)

                    if GinRummyGame.playVerbose:
                        print("Player %d draws %s.\n" % (currentPlayer, drawCard))
                    hands[currentPlayer].append(drawCard)
#-------------------------------------------------------------- APBD --------------------------------------------------------------#
                    
                    self.appendState(currentPlayer,discards,oppCard[currentPlayer])
                    
                    # 2021-01-16  -  Update player with current states
                    if isinstance(GinRummyGame.players[currentPlayer], MLPGinRummyPlayer):
                        GinRummyGame.players[currentPlayer].updateStates(states[-1])

                    discardCard = GinRummyGame.players[currentPlayer].getDiscard()

                    # 2021-01-16  -  Track for both players instead of just player 0
                    # Track opponent pickup and discard after each discard 

                    # Set discarded card to 0 (in case discarded card was seen)
                    oppCard[1 - currentPlayer][discardCard.getId()] = 0
                    if drawFaceUp: # if opponent draws TopCard from discard
                        oppCard[1 - currentPlayer][drawCard.getId()] = 1


                    if not discardCard in hands[currentPlayer] or discardCard == faceUpCard:
                        print("Player %d discards %s illegally and forfeits.\n" % (currentPlayer, discardCard))
                        return opponent;
                    hands[currentPlayer].remove(discardCard)
                    for i in range(2):
                        GinRummyGame.players[i].reportDiscard(currentPlayer, discardCard)                    
                    if GinRummyGame.playVerbose:
                        print("Player %d discards %s.\n" % (currentPlayer, discardCard))
                    discards.append(discardCard)

                    if GinRummyGame.playVerbose:
                        unmeldedCards = hands[currentPlayer].copy()
                        bestMelds = GinRummyUtil.cardsToBestMeldSets(unmeldedCards)
                        if len(bestMelds) == 0:
                            print("Player %d has %s with %d deadwood.\n" % (currentPlayer, unmeldedCards, GinRummyUtil.getDeadwoodPoints3(unmeldedCards)))
                        else:
                            melds = bestMelds[0]
                            for meld in melds:
                                for card in meld:
                                    unmeldedCards.remove(card)
                            melds.extend(unmeldedCards)
                            print("Player %d has %s with %d deadwood.\n" % (currentPlayer, melds, GinRummyUtil.getDeadwoodPoints3(unmeldedCards)))

#-------------------------------------------------------------- KNOCK --------------------------------------------------------------#
                    # CHECK FOR KNOCK
                    knockMelds = GinRummyGame.players[currentPlayer].getFinalMelds()
                    if knockMelds != None:
                        # print(knockMelds)
                        # 2021-01-16  -  Check if MLPGinRummyPlayer knocks
                        if isinstance(GinRummyGame.players[currentPlayer], MLPGinRummyPlayer):
                            knock = GinRummyGame.players[currentPlayer].knockAction()
                            print(knock)
                            if knock:
                                break
                        else:
                            break
                    
                turnsTaken += 1
                currentPlayer = 1 if currentPlayer == 0 else 0
                opponent = 1 if currentPlayer == 0 else 0

            if knockMelds != None:
                # round didn't end due to non-knocking and 2 cards remaining in draw pile
                # check legality of knocking meld
                handBitstring = GinRummyUtil.cardsToBitstring(hands[currentPlayer])
                unmelded = handBitstring
                for meld in knockMelds:
                    meldBitstring = GinRummyUtil.cardsToBitstring(meld)
                    if (not meldBitstring in GinRummyUtil.getAllMeldBitstrings()) or ((meldBitstring & unmelded) != meldBitstring):
                        # non-meld or meld not in hand
                        print("Player %d melds %s illegally and forfeits.\n" % (currentPlayer, knockMelds))
                        return opponent
                    unmelded &= ~meldBitstring # remove successfully melded cards from

                # compute knocking deadwood
                knockingDeadwood = GinRummyUtil.getDeadwoodPoints1(knockMelds, hands[currentPlayer])
                if knockingDeadwood > GinRummyUtil.MAX_DEADWOOD:
                    print("Player %d melds %s with greater than %d deadwood and forfeits.\n" % (currentPlayer, knockMelds, knockingDeadwood))
                    return opponent

                meldsCopy = []
                for meld in knockMelds:
                    meldsCopy.append(meld.copy())
                for i in range(2):
                    GinRummyGame.players[i].reportFinalMelds(currentPlayer, meldsCopy)
                if GinRummyGame.playVerbose:
                    if knockingDeadwood > 0:
                        print("Player %d melds %s with %d deadwood from %s.\n" % (currentPlayer, knockMelds, knockingDeadwood, GinRummyUtil.bitstringToCards(unmelded)))
                    else:
                        print("Player %d goes gin with melds %s.\n" % (currentPlayer, knockMelds))

                # get opponent meld
                opponentMelds = GinRummyGame.players[opponent].getFinalMelds();
                meldsCopy = []
                for meld in opponentMelds:
                    meldsCopy.append(meld.copy())
                for i in range(2):
                    GinRummyGame.players[i].reportFinalMelds(opponent, meldsCopy)

                # check legality of opponent meld
                opponentHandBitstring = GinRummyUtil.cardsToBitstring(hands[opponent])
                opponentUnmelded = opponentHandBitstring
                for meld in opponentMelds:
                    meldBitstring = GinRummyUtil.cardsToBitstring(meld)
                    if (meldBitstring not in GinRummyUtil.getAllMeldBitstrings()) or ((meldBitstring & opponentUnmelded) != meldBitstring):
                        # non-meld or meld not in hand
                        print("Player %d melds %s illegally and forfeits.\n" % (opponent, opponentMelds))
                        return currentPlayer
                    opponentUnmelded &= ~meldBitstring # remove successfully melded cards from

                if GinRummyGame.playVerbose:
                    print("Player %d melds %s.\n" % (opponent, opponentMelds))

                # lay off on knocking meld (if not gin)
                unmeldedCards = GinRummyUtil.bitstringToCards(opponentUnmelded)
                if knockingDeadwood > 0:
                    # knocking player didn't go gin
                    cardWasLaidOff = False
                    while True:
                        # attempt to lay each card off
                        cardWasLaidOff = False
                        layOffCard = None
                        layOffMeld = None
                        for card in unmeldedCards:
                            for meld in knockMelds:
                                newMeld = meld.copy()
                                newMeld.append(card)
                                newMeldBitstring = GinRummyUtil.cardsToBitstring(newMeld)
                                if newMeldBitstring in GinRummyUtil.getAllMeldBitstrings():
                                    layOffCard = card
                                    layOffMeld = meld
                                    break
                            if layOffCard != None:
                                if GinRummyGame.playVerbose:
                                    print("Player %d lays off %s on %s.\n" % (opponent, layOffCard, layOffMeld))
                                for i in range(2):
                                    GinRummyGame.players[i].reportLayoff(opponent, layOffCard, layOffMeld.copy())
                                unmeldedCards.remove(layOffCard)
                                layOffMeld.append(layOffCard)
                                cardWasLaidOff = True
                                break
                        if not cardWasLaidOff:
                            break

                opponentDeadwood = 0
                for card in unmeldedCards:
                    opponentDeadwood += GinRummyUtil.getDeadwoodPoints2(card)
                if GinRummyGame.playVerbose:
                    print("Player %d has %d deadwood with %s\n" % (opponent, opponentDeadwood, unmeldedCards))
                # compare deadwood and compute new scores
                if knockingDeadwood == 0:
                    # gin round win
                    scores[currentPlayer] += GinRummyUtil.GIN_BONUS + opponentDeadwood
                    if GinRummyGame.playVerbose:
                        print("Player %d scores the gin bonus of %d plus opponent deadwood %d for %d total points.\n" % \
                        (currentPlayer, GinRummyUtil.GIN_BONUS, opponentDeadwood, GinRummyUtil.GIN_BONUS + opponentDeadwood))

                elif knockingDeadwood < opponentDeadwood:
                    # non-gin round win:
                    scores[currentPlayer] += opponentDeadwood - knockingDeadwood;
                    if GinRummyGame.playVerbose:
                        print("Player %d scores the deadwood difference of %d.\n" % (currentPlayer, opponentDeadwood - knockingDeadwood))

                else:
                    # undercut win for opponent
                    scores[opponent] += GinRummyUtil.UNDERCUT_BONUS + knockingDeadwood - opponentDeadwood;
                    if GinRummyGame.playVerbose:
                        print("Player %d undercuts and scores the undercut bonus of %d plus deadwood difference of %d for %d total points.\n" % \
                        (opponent, GinRummyUtil.UNDERCUT_BONUS, knockingDeadwood - opponentDeadwood, GinRummyUtil.UNDERCUT_BONUS + knockingDeadwood - opponentDeadwood))

                startingPlayer = 1 if startingPlayer == 0 else 0 # starting player alternates

            # If the round ends due to a two card draw pile with no knocking, the round is cancelled.
            else:
                if GinRummyGame.playVerbose:
                    print("The draw pile was reduced to two cards without knocking, so the hand is cancelled.")

            # report final hands
            for i in range(2):
                for j in range(2):
                    GinRummyGame.players[i].reportFinalHand(j, hands[j].copy())

            # score reporting
            if GinRummyGame.playVerbose:
                print("Player\tScore\n0\t%d\n1\t%d\n" % (scores[0], scores[1]))
            for i in range(2):
                GinRummyGame.players[i].reportScores(scores.copy())

        if GinRummyGame.playVerbose:
            print("Player %s wins.\n" % (0 if scores[0] > scores[1] else 1))
        return 0 if scores[0] >= GinRummyUtil.GOAL_SCORE else 1



# Generate Data

In [295]:
# Available states-action pairs:
state_action_pair = {'all': 'all', # all actions EXCEPT 0,1,4
                    'bpbd': 'draw', # actions 2/3 
                    'apbd': ['discard', 'knock'], # actions 6-57, 58-109
                    'apad': 'knock_bin'} # binary action

def generateData(pth,state,action,numGames,verbose):
    # Multiple non-verbose games
    GinRummyGame.setPlayVerbose(verbose)
    numP1Wins = 0

    agent0 = MLPGinRummyPlayer()
    model_name = 'all_states_all_actions'
    model_name = 'all_states_all_actions_MLP_base_extra_knock_data_40K'
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.load('{}/models/{}/{}/{}/model.pt'.format(pth,state,action,model_name), map_location=device)
    agent0.loadModel(torch.load('{}/models/{}/{}/{}/model.pt'.format(pth,state,action,model_name), map_location=device))
    
    game = GinRummyGame(agent0, SimpleGinRummyPlayer())
    # game = GinRummyGame(SimpleGinRummyPlayer(), SimpleGinRummyPlayer())
    if action in state_action_pair[state]:
        for i in range(numGames):
            if i % 100 == 0:
                print("Game ... ", i)
            numP1Wins += game.play(state=state,action=action)
        # Save states and actions
        # Create path if doesn't exist
        state_pth = '{}/data/{}'.format(pth,state)
        if not os.path.exists(state_pth):
            os.mkdir(state_pth)
        action_pth = '{}/{}'.format(state_pth,action)
        if not os.path.exists(action_pth):
            os.mkdir(action_pth)
        data_pth = action_pth
                
        # save_str = numGames//1000
        # np.save('{}/s_{}k.npy'.format(data_pth,save_str), states)
        # np.save('{}/a_{}k.npy'.format(data_pth,save_str), actions)
        print("Games Won: P0:%d, P1:%d.\n" % (numGames - numP1Wins, numP1Wins))
    else:
        print('illegeal state-action pair')

## All

In [310]:
state = 'all'
action = 'all'
# numGames = 1
numGames = 2000
states, actions = [], []
generateData(pth,state,action,numGames,verbose=True)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

Player 1 discards 8S.

Player 1 has [[AD, 2D, 3D], [AC, 2C, 3C, 4C], 3S, 2H, 6C] with 11 deadwood.

Update States
Pickup from discard: 0.99886227
Draw new card: 0.0011279513
Pickup Action
Player 0 draws 8H.

Update States
JD 42
MAX:0.9999995231628418, 3.8989861650406965e-07
Discard Action
Player 0 discards JD.

Player 0 has [[9C, TC, JC], [6S, 6H, 6D], AS, 2S, 4S, 8H] with 15 deadwood.

Player 1 draws 8C.

Player 1 discards 8C.

Player 1 has [[AD, 2D, 3D], [AC, 2C, 3C, 4C], 3S, 2H, 6C] with 11 deadwood.

Update States
Pickup from discard: 0.8709139
Draw new card: 0.12907973
Pickup Action
Player 0 draws KD.

Update States
KD 44
MAX:3.737710869700095e-07, 5.9068072005175054e-05
Knock Action
Player 0 discards 4S.

Player 0 has [[9C, TC, JC], [6S, 6H, 6D], AS, 2S, 8H, KD] with 21 deadwood.

Player 1 draws 5D.

Player 1 discards 6C.

Player 1 has [[AD, 2D, 3D], [AC, 2C, 3C, 4C], 3S, 2H, 5D] with 10 deadwood.

Player 1 melds [

# Testing

In [86]:

model_name = 'all_states_all_actions'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.load('{}/models/{}/{}/{}/model.pt'.format(pth,state,action,model_name), map_location=device)

MLP_base(
  (l1): Linear(in_features=260, out_features=520, bias=True)
  (l2): Linear(in_features=520, out_features=110, bias=True)
  (act_fnc): Sigmoid()
  (sfx): Softmax(dim=1)
)

In [231]:
state_s = 'apbd'
action_a = 'knock'
data_pth = '{}/data/{}/{}'.format(pth,state_s,action_a)
states = np.load('{}/s_2k.npy'.format(data_pth))
actions = np.load('{}/a_2k.npy'.format(data_pth))

## Agent

In [244]:
agent = MLPGinRummyPlayer()

In [245]:
state_s = 'all'
action_a = 'all'
agent.loadModel(torch.load('{}/models/{}/{}/{}/model.pt'.format(pth,state_s,action_a,model_name), map_location=device))

Load Model


In [246]:
# input = np.expand_dims(states[0],axis=0)
# prob = agent.model(torch.from_numpy(input).type(torch.FloatTensor))
# action = prob.detach().numpy().reshape(-1)
# action[6:58]*np.zeros(52)
# # agent.model(np.expand_dims(,axis=0))

In [264]:
deck = Deck.getShuffle(random.randrange(10 ** 8))
hands = []
hands.extend([[], []])
hands[0] = []
hands[1] = []
for i in range(2 * GinRummyGame.HAND_SIZE):
    hands[i % 2] += [deck.pop()]
agent.startGame(0, 0, hands[0]);

In [265]:
agent.updateStates(states[-1])

Update States


In [266]:
c = Deck.strCardMap['AC']
agent.willDrawFaceUpCard(c)

Draw: 5.401174e-06
Pickup: 0.0074815652
Pickup Action


False

In [267]:
agent.playerNum

0

In [274]:
i = 10
agent.updateStates(states[i])
agent.reportDraw(0, c)
agent.getDiscard()
all_classes[np.argmax(actions[i])]

Update States
1
8C 52
MAX:0.9996635913848877, 0.00030408985912799835
Discard Action


'8C'