In [1]:
# Run this cell to mount your Google Drive.

from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


# Gin Rummy

In [2]:
pth = '/content/drive/MyDrive/Colab Notebooks/Thesis'

In [3]:
all_classes = ['SP0','SP1','Draw','Pickup','DH','GIN',
               'AS', '2S', '3S', '4S', '5S', '6S', '7S', '8S', '9S', 'TS', 'JS', 'QS', 'KS',
               'AH', '2H', '3H', '4H', '5H', '6H', '7H', '8H', '9H', 'TH', 'JH', 'QH', 'KH',
               'AD', '2D', '3D', '4D', '5D', '6D', '7D', '8D', '9D', 'TD', 'JD', 'QD', 'KD',
               'AC', '2C', '3C', '4C', '5C', '6C', '7C', '8C', '9C', 'TC', 'JC', 'QC', 'KC',
               'AS', '2S', '3S', '4S', '5S', '6S', '7S', '8S', '9S', 'TS', 'JS', 'QS', 'KS',
               'AH', '2H', '3H', '4H', '5H', '6H', '7H', '8H', '9H', 'TH', 'JH', 'QH', 'KH',
               'AD', '2D', '3D', '4D', '5D', '6D', '7D', '8D', '9D', 'TD', 'JD', 'QD', 'KD',
               'AC', '2C', '3C', '4C', '5C', '6C', '7C', '8C', '9C', 'TC', 'JC', 'QC', 'KC']


## Imports

In [4]:
#-------------------------------------------------------------------------------
# The following code was originally written by Todd Neller in Java.
# It was translated into Python by Anthony Hein.
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
# A class for modeling a game of Gin Rummy
# @author Todd W. Neller
# @version 1.0
#-------------------------------------------------------------------------------

#-------------------------------------------------------------------------------
# Copyright (C) 2020 Todd Neller
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# Information about the GNU General Public License is available online at:
#   http://www.gnu.org/licenses/
# To receive a copy of the GNU General Public License, write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
#-------------------------------------------------------------------------------

import random
import time
import numpy as np
import os
import torch

%cd /content/drive/My Drive/Colab Notebooks/Thesis/GinRummy

from Deck import Deck
from GinRummyUtil import GinRummyUtil
from SimpleGinRummyPlayer import SimpleGinRummyPlayer

%cd /content/drive/My Drive/Colab Notebooks/Thesis/SupervisedLearning

from models import *

%cd /content/drive/My Drive/Colab Notebooks/Thesis
#-------------------------------------------------------------------------------

# TRACKING
# Plane (5x52)      Feature
# 0	 currHand       the cards in current player's hand
# 1	 topCard        the top card of the discard pile
# 2	 deadCard       the dead cards: cards in discard pile (excluding the top card)
# 3	 oppCard        opponent known cards: cards picked up from discard pile, but not discarded
# 4	 unknownCard    the unknown cards: cards in stockpile or in opponent hand (but not known)

# Action ID         Action
# 0	                score_player_0_action
# 1	                score_player_1_action
# 2	                draw_card_action
# 3	                pick_up_discard_action
# 4	                declare_dead_hand_action
# 5	                gin_action
# 6 - 57	        discard_action
# 58 - 109	        knock_action

# Knock_bin
# Action ID         Action
# 0	                No Knock
# 1	                Knock

def one_hot(cards):
    ret = np.zeros(52)
    for card in cards:
        ret[card.getId()] = 1
    return ret

def un_one_hot(arr):
    rankNames = ["A", "2", "3", "4", "5", "6", "7", "8", "9", "T", "J", "Q", "K"]
    suitNames = ['S', 'H', 'D', 'C']
    ret = []
    for i in range(len(arr)):
        if arr[i] != 0:
            ret.append(rankNames[i%13] + suitNames[i//13])
    return ret

#-------------------------------------------------------------------------------

/content/drive/My Drive/Colab Notebooks/Thesis/GinRummy
/content/drive/My Drive/Colab Notebooks/Thesis/SupervisedLearning
/content/drive/My Drive/Colab Notebooks/Thesis


## MLPGinRummyPlayer

In [5]:
# -------------------------------------------------------------------------------
#  MLPGinRummyPlayer
#
#  This estimation will be calculated using a Multilayer Percepton trained on the
#  SimpleGinRummyPlayer written
#  by Calvin Tan.
#
#  @author Calvin Tan
#  @version 1.0
# -------------------------------------------------------------------------------

# -------------------------------------------------------------------------------
# The following code was originally written by Todd Neller in Java.
# It was translated into Python by May Jiang.
# -------------------------------------------------------------------------------

# -------------------------------------------------------------------------------
# Copyright (C) 2020 Todd Neller
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# Information about the GNU General Public License is available online at:
#   http://www.gnu.org/licenses/
# To receive a copy of the GNU General Public License, write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
# -------------------------------------------------------------------------------

from typing import List, TypeVar
from random import randint
from GinRummyUtil import GinRummyUtil
from GinRummyPlayer import GinRummyPlayer

# Import MLP Models
# from SupervisedLearning.models import *

Card = TypeVar('Card')

class MLPGinRummyPlayer(GinRummyPlayer):

    def loadModel(self, model_pt):
        print('Load Model')
        self.model = model_pt

    def setVerbose(self, verbose):
        self.playVerbose = verbose

    def updateStates(self, states):
        if self.playVerbose:
            print('Update States')
        self.state = states

    def knockAction(self) -> bool:
        return self.knock

    # Inform player of 0-based player number (0/1), starting player number (0/1), and dealt cards
    def startGame(self, playerNum: int, startingPlayerNum: int, cards: List[Card]) -> None:
        self.playerNum = playerNum
        self.startingPlayerNum = startingPlayerNum
        self.cards = list(cards)
        self.opponentKnocked = False
        self.drawDiscardBitstrings = [] # long[], or List[int]
        self.faceUpCard = None
        self.faceUpCardBool = False
        self.drawnCard = None
        self.state = None
        self.knock = False
        self.playVerbose = False

    # Return whether or not player will draw the given face-up card on the draw pile.
    def willDrawFaceUpCard(self, card: Card) -> bool:
        self.faceUpCard = card
        # BPBD, either draw(2)->False or pickup(3)->True
        state = np.expand_dims(self.state, axis=0)
        state = torch.from_numpy(state).type(torch.FloatTensor).to(device)
        action = self.model(state)
        action = action.detach().numpy().reshape(-1)
        if self.playVerbose:
            print('Draw new card:', action[2])
            print('Pickup from discard:', action[3])
        if action[3] > action[2]:
            # print('Pickup Discard Action')
            self.faceUpCardBool = True
            return True
        # print('Draw from Deck Action')
        self.faceUpCardBool = False
        return False

    # Report that the given player has drawn a given card and, if known, what the card is.
    # If the card is unknown because it is drawn from the face-down draw pile, the drawnCard is null.
    # Note that a player that returns false for willDrawFaceUpCard will learn of their face-down draw from this method.
    def reportDraw(self, playerNum: int, drawnCard: Card) -> None:
        # Ignore other player draws.  Add to cards if playerNum is this player.
        if playerNum == self.playerNum:
            self.cards.append(drawnCard)
            self.drawnCard = drawnCard






    # def getDiscard(self) -> Card:
    #     # Discard a random card (not just drawn face up) leaving minimal deadwood points.
    #     minDeadwood = float('inf')
    #     candidateCards = []
    #     for card in self.cards:
    #         # Cannot draw and discard face up card.
    #         if card == self.drawnCard and self.drawnCard == self.faceUpCard:
    #         # if card == self.drawnCard and self.faceUpCard:
    #             continue
    #         # Disallow repeat of draw and discard.
    #         drawDiscard = [self.drawnCard, card]
    #         if GinRummyUtil.cardsToBitstring(drawDiscard) in self.drawDiscardBitstrings:
    #             continue

    #         remainingCards = list(self.cards)
    #         remainingCards.remove(card)
    #         bestMeldSets = GinRummyUtil.cardsToBestMeldSets(remainingCards)
    #         deadwood = GinRummyUtil.getDeadwoodPoints3(remainingCards) if len(bestMeldSets) == 0 \
    #             else GinRummyUtil.getDeadwoodPoints1(bestMeldSets[0], remainingCards)
    #         if deadwood <= minDeadwood:
    #             if deadwood < minDeadwood:
    #                 minDeadwood = deadwood
    #                 candidateCards.clear()
    #             candidateCards.append(card)
    #     # Prevent future repeat of draw, discard pair.
    #     discard = candidateCards[randint(0, len(candidateCards)-1)]
    #     drawDiscard = [self.drawnCard, discard]
    #     self.drawDiscardBitstrings.append(GinRummyUtil.cardsToBitstring(drawDiscard))
    #     return discard

    # Get the player's discarded card.  If you took the top card from the discard pile,
    # you must discard a different card.
    # If this is not a card in the player's possession, the player forfeits the game.
    # @return the player's chosen card for discarding
    def getDiscard(self) -> Card:
        # APBD, either either discard or knock...
        # determine the allowable actions (which cards can be discarded/knocked on)
        currHand = np.array(self.state[0:52])
        knockCards = np.array(self.state[0:52])
        # if self.playVerbose:
        #     print('Current Hand:', un_one_hot(currHand))
        # disallow discarding PickUp FaceUp/Discarded Card
        if self.faceUpCardBool:
        # if self.drawnCard == self.faceUpCard:
            currHand[self.drawnCard.getId()] = 0
            knockCards[self.drawnCard.getId()] = 0
        
        # prune illegal knock actions
        cardIndex = np.where(knockCards == 1)[0]
        for c in cardIndex:
            remainingCards = list(self.cards)
            remainingCards.remove(Deck.getCard(c))
            bestMeldSets = GinRummyUtil.cardsToBestMeldSets(remainingCards)
            deadwood = GinRummyUtil.getDeadwoodPoints3(remainingCards) if len(bestMeldSets) == 0 \
                else GinRummyUtil.getDeadwoodPoints1(bestMeldSets[0], remainingCards)
            if deadwood > 10:
                knockCards[c] = 0

        state = np.expand_dims(self.state, axis=0)
        state = torch.from_numpy(state).type(torch.FloatTensor).to(device)
        action = self.model(state)
        action = action.detach().numpy().reshape(-1)

        discardMax = max(currHand * action[6:58])
        # knockMax = max(currHand * action[58:110])
        knockMax = max(knockCards * action[58:110])

        if self.playVerbose:
            unmeldedCards = self.cards.copy()
            bestMelds = GinRummyUtil.cardsToBestMeldSets(unmeldedCards)
            if len(bestMelds) > 0:
                melds = bestMelds[0]
                for meld in melds:
                    for card in meld:
                        unmeldedCards.remove(card)
                melds.extend(unmeldedCards)
            else:
                melds = unmeldedCards
            print('Current Hand:', melds)
            if np.argmax(action) > 58:
                # print('Knock', all_classes[np.argmax(action)], '| D:', Deck.getCard(np.argmax(currHand * action[6:58])), '| K:', Deck.getCard(np.argmax(currHand * action[58:])), '|', np.argmax(action))
                print('Knock', all_classes[np.argmax(action)], '| D:', Deck.getCard(np.argmax(currHand * action[6:58])), '| K:', Deck.getCard(np.argmax(knockCards * action[58:])), '|', np.argmax(action))
            else:
                # print('Discard', all_classes[np.argmax(action)], '| D:', Deck.getCard(np.argmax(currHand * action[6:58])), '| K:', Deck.getCard(np.argmax(currHand * action[58:])), '|', np.argmax(action))
                print('Discard', all_classes[np.argmax(action)], '| D:', Deck.getCard(np.argmax(currHand * action[6:58])), '| K:', Deck.getCard(np.argmax(knockCards * action[58:])), '|', np.argmax(action))
            print('MAX:{:.4f}, {:.4f}'.format(discardMax, knockMax))

        if discardMax > knockMax or int(sum(knockCards) == 0):
            if self.playVerbose:
                print('Discard Action')
            self.knock = False
            return Deck.getCard(np.argmax(currHand * action[6:58]))
        else:
            if self.playVerbose:
                print('Knock Action')
            self.knock = True
            # return Deck.getCard(np.argmax(currHand * action[58:]))
            return Deck.getCard(np.argmax(knockCards * action[58:]))




















    # Report that the given player has discarded a given card.
    def reportDiscard(self, playerNum: int, discardedCard: Card) -> None:
        # Ignore other player discards.  Remove from cards if playerNum is this player.
        if playerNum == self.playerNum:
            self.cards.remove(discardedCard)

    # At the end of each turn, this method is called and the player that cannot (or will not) end the round will return a null value.
    # However, the first player to "knock" (that is, end the round), and then their opponent, will return an ArrayList of ArrayLists of melded cards.
    # All other cards are counted as "deadwood", unless they can be laid off (added to) the knocking player's melds.
    # When final melds have been reported for the other player, a player should return their final melds for the round.
    # @return null if continuing play and opponent hasn't melded, or an ArrayList of ArrayLists of melded cards.
    def getFinalMelds(self) -> List[List[Card]]:
        # Check if deadwood of maximal meld is low enough to go out.
        bestMeldSets = GinRummyUtil.cardsToBestMeldSets(self.cards) # List[List[List[Card]]]
        if not self.opponentKnocked and (len(bestMeldSets) == 0 or \
            GinRummyUtil.getDeadwoodPoints1(bestMeldSets[0], self.cards) > \
            GinRummyUtil.MAX_DEADWOOD):
            return None
        if len(bestMeldSets) == 0:
            return []
        return bestMeldSets[randint(0, len(bestMeldSets)-1)]

    # When an player has ended play and formed melds, the melds (and deadwood) are reported to both players.
    def reportFinalMelds(self, playerNum: int, melds: List[List[Card]]) -> None:
        # Melds ignored by simple player, but could affect which melds to make for complex player.
        if playerNum != self.playerNum:
            self.opponentKnocked = True

    # Report current player scores, indexed by 0-based player number.
    def reportScores(self, scores: List[int]) -> None:
        # Ignored by simple player, but could affect strategy of more complex player.
        return

    # Report layoff actions.
    def reportLayoff(self, playerNum: int, layoffCard: Card, opponentMeld: List[Card]) -> None:
        # Ignored by simple player, but could affect strategy of more complex player.
        return

    # Report the final hands of players.
    def reportFinalHand(self, playerNum: int, hand: List[Card]) -> None:
        # Ignored by simple player, but could affect strategy of more complex player.
        return

### Estimator Network & load checkpoint

In [6]:
class EstimatorNetwork(nn.Module):
    ''' The function approximation network for Estimator
        It is just a series of sigmoid layers. All in/out are torch.tensor
        (OLD) It is just a series of tanh layers. All in/out are torch.tensor
    '''

    def __init__(self, mlp_layers=None, batch_norm=False, knock_layer=False, top_layer=True):
        ''' Initialize the Q network
        Args:
            action_num (int): number of legal actions
            state_shape (list): shape of state tensor
            mlp_layers (list): output size of each fc layer
        '''
        super(EstimatorNetwork, self).__init__()

        self.action_num = 110
        self.state_shape = 260
        self.mlp_layers = mlp_layers
        self.batch_norm = batch_norm
        self.knock_layer = knock_layer
        self.top_layer = top_layer

        # build the Q network
        layer_dims = [np.prod(self.state_shape)] + self.mlp_layers
        fc = [nn.Flatten()]
        if batch_norm:
            fc.append(nn.BatchNorm1d(layer_dims[0]))
        for i in range(len(layer_dims)-1):
            fc.append(nn.Linear(layer_dims[i], layer_dims[i+1], bias=True))
            fc.append(nn.Sigmoid())
        # add top layer onto Q-network
        if self.top_layer:
            fc.append(nn.Linear(layer_dims[-1], self.action_num, bias=True))
            fc.append(nn.Softmax(dim=1))
        else:
            # remove last sigmoid layer and append softmax layer
            fc.pop()
            fc.append(nn.Softmax(dim=1))

        # add knock layer to be an additional layer, which will manually be set identity
        # with bias on the knock actions (58-110)
        # required to be frozen!!!
        if self.knock_layer:
            fc.append(nn.Linear(self.action_num, self.action_num, bias=True))
            fc.append(nn.Softmax(dim=1))
        self.fc_layers = nn.Sequential(*fc)
        
    def forward(self, s):
        ''' Predict action values
        Args:
            s  (Tensor): (batch, state_shape)
        '''
        return self.fc_layers(s)

In [7]:
def load_checkpoint(checkpoint):
    pretrained_dict = {}
    if knock_layer:
        pretrained_dict = checkpoint
    else:
        model_dict = checkpoint
        # check if there is batch norm layer
        if batch_norm:
            b_layer = 1 
            pretrained_dict['fc_layers.1.weight'] = model_dict['fc_layers.1.weight']
            pretrained_dict['fc_layers.1.bias'] = model_dict['fc_layers.1.bias']
            pretrained_dict['fc_layers.1.running_mean'] = model_dict['fc_layers.1.running_mean']
            pretrained_dict['fc_layers.1.running_var'] = model_dict['fc_layers.1.running_var']
            pretrained_dict['fc_layers.1.num_batches_tracked'] = model_dict['fc_layers.1.num_batches_tracked']
        else:
            b_layer = 0
        curr_layer = 1 + b_layer
        for i in range(len(mlp_layers)):
            pretrained_dict['fc_layers.{}.weight'.format(curr_layer)] = model_dict['fc_layers.{}.weight'.format(curr_layer)]
            pretrained_dict['fc_layers.{}.bias'.format(curr_layer)] = model_dict['fc_layers.{}.bias'.format(curr_layer)]
            curr_layer += 2
        if top_layer:
            pretrained_dict['fc_layers.{}.weight'.format(curr_layer)] = model_dict['fc_layers.{}.weight'.format(curr_layer)]
            pretrained_dict['fc_layers.{}.bias'.format(curr_layer)] = model_dict['fc_layers.{}.bias'.format(curr_layer)]
    return pretrained_dict

## RandGinRummyPlayer

In [8]:
# -------------------------------------------------------------------------------
#  RandGinRummyPlayer
#
#  This estimation will be calculated using a Multilayer Percepton trained on the
#  SimpleGinRummyPlayer written
#  by Calvin Tan.
#
#  @author Calvin Tan
#  @version 1.0
# -------------------------------------------------------------------------------

# -------------------------------------------------------------------------------
# The following code was originally written by Todd Neller in Java.
# It was translated into Python by May Jiang.
# -------------------------------------------------------------------------------

# -------------------------------------------------------------------------------
# Copyright (C) 2020 Todd Neller
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# Information about the GNU General Public License is available online at:
#   http://www.gnu.org/licenses/
# To receive a copy of the GNU General Public License, write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
# -------------------------------------------------------------------------------

from typing import List, TypeVar
from random import randint
from GinRummyUtil import GinRummyUtil
from GinRummyPlayer import GinRummyPlayer
import random

# Import MLP Models
# from SupervisedLearning.models import *

Card = TypeVar('Card')

class RandGinRummyPlayer(GinRummyPlayer):

    # Inform player of 0-based player number (0/1), starting player number (0/1), and dealt cards
    def startGame(self, playerNum: int, startingPlayerNum: int, cards: List[Card]) -> None:
        self.playerNum = playerNum
        self.startingPlayerNum = startingPlayerNum
        self.cards = list(cards)
        self.opponentKnocked = False
        self.drawDiscardBitstrings = [] # long[], or List[int]
        self.faceUpCard = None
        self.drawnCard = None
        self.state = None

    def willDrawFaceUpCard(self, card: Card) -> bool:
        # Return random choice
        self.faceUpCard = card
        newCards = list(self.cards)
        newCards.append(card)
        choice = random.randint(0, 1)
        if choice == 0:
            return True
        return False


    # Report that the given player has drawn a given card and, if known, what the card is.
    # If the card is unknown because it is drawn from the face-down draw pile, the drawnCard is null.
    # Note that a player that returns false for willDrawFaceUpCard will learn of their face-down draw from this method.
    def reportDraw(self, playerNum: int, drawnCard: Card) -> None:
        # Ignore other player draws.  Add to cards if playerNum is this player.
        if playerNum == self.playerNum:
            self.cards.append(drawnCard)
            self.drawnCard = drawnCard

    # Get the player's discarded card.  If you took the top card from the discard pile,
    # you must discard a different card.
    # If this is not a card in the player's possession, the player forfeits the game.
    # @return the player's chosen card for discarding
    def getDiscard(self) -> Card:

        choice = random.randint(0, len(self.cards)-1)
        discCard = self.cards[choice]
        while discCard == self.faceUpCard:
            choice = random.randint(0, len(self.cards)-1)
            discCard = self.cards[choice]
        return discCard


    # Report that the given player has discarded a given card.
    def reportDiscard(self, playerNum: int, discardedCard: Card) -> None:
        # Ignore other player discards.  Remove from cards if playerNum is this player.
        if playerNum == self.playerNum:
            self.cards.remove(discardedCard)

    # At the end of each turn, this method is called and the player that cannot (or will not) end the round will return a null value.
    # However, the first player to "knock" (that is, end the round), and then their opponent, will return an ArrayList of ArrayLists of melded cards.
    # All other cards are counted as "deadwood", unless they can be laid off (added to) the knocking player's melds.
    # When final melds have been reported for the other player, a player should return their final melds for the round.
    # @return null if continuing play and opponent hasn't melded, or an ArrayList of ArrayLists of melded cards.
    def getFinalMelds(self) -> List[List[Card]]:
        # Check if deadwood of maximal meld is low enough to go out.
        bestMeldSets = GinRummyUtil.cardsToBestMeldSets(self.cards) # List[List[List[Card]]]
        if not self.opponentKnocked and (len(bestMeldSets) == 0 or \
            GinRummyUtil.getDeadwoodPoints1(bestMeldSets[0], self.cards) > \
            GinRummyUtil.MAX_DEADWOOD):
            return None
        if len(bestMeldSets) == 0:
            return []
        return bestMeldSets[randint(0, len(bestMeldSets)-1)]

    # When an player has ended play and formed melds, the melds (and deadwood) are reported to both players.
    def reportFinalMelds(self, playerNum: int, melds: List[List[Card]]) -> None:
        # Melds ignored by simple player, but could affect which melds to make for complex player.
        if playerNum != self.playerNum:
            self.opponentKnocked = True

    # Report current player scores, indexed by 0-based player number.
    def reportScores(self, scores: List[int]) -> None:
        # Ignored by simple player, but could affect strategy of more complex player.
        return

    # Report layoff actions.
    def reportLayoff(self, playerNum: int, layoffCard: Card, opponentMeld: List[Card]) -> None:
        # Ignored by simple player, but could affect strategy of more complex player.
        return

    # Report the final hands of players.
    def reportFinalHand(self, playerNum: int, hand: List[Card]) -> None:
        # Ignored by simple player, but could affect strategy of more complex player.
        return

## Game Definition

In [9]:
class GinRummyGame:

    # Hand size (before and after turn). After draw and before discard there is one extra card.
    HAND_SIZE = 10;

    # Whether or not to print information during game play
    playVerbose = False;

    # Two Gin Rummy players numbered according to their array index.
    players = [];

    # Set whether or not there is to be printed output during gameplay.
    def setPlayVerbose(self, playVerbose):
        self.playVerbose = playVerbose
    
    #-------------------------------- updateState --------------------------------#
    # 2020-12-20: Define a method to append states
    # 2021-01-16: modified append state to work for either player (0 or 1)
    def updateState(self, currentPlayer, discards, oppCard):
        currHand = one_hot(self.players[currentPlayer].cards)
        topCard = np.zeros(52)
        if len(discards) > 0:
            topCard[discards[-1].getId()] = 1
        deadCard = np.zeros(52)
        for d in range(len(discards) - 1):
            deadCard[discards[d].getId()] = 1
        unknownCard = np.ones(52) - currHand - topCard - deadCard - oppCard
        self.states = np.array([currHand, topCard, deadCard, oppCard, unknownCard]).flatten()
    #------------------------------------------------------------------------------#

    # Create a self with two given players
    def __init__(self, player0, player1):
        self.players = []
        self.players.extend([player0, player1])

    # Play a game of Gin Rummy and return the winning player number 0 or 1.
    # @return the winning player number 0 or 1

    def play(self):
        scores = [0, 0]
        hands = []
        hands.extend([[], []])
        startingPlayer = random.randrange(2);

        # while game not over
        while scores[0] < GinRummyUtil.GOAL_SCORE and scores[1] < GinRummyUtil.GOAL_SCORE:
            
            num_turns = 0
            currentPlayer = startingPlayer
            opponent = (1 if currentPlayer == 0 else 0)
            
            # get shuffled deck and deal cards
            deck = Deck.getShuffle(random.randrange(10 ** 8))
            hands[0] = []
            hands[1] = []
            for i in range(2 * self.HAND_SIZE):
                hands[i % 2] += [deck.pop()]
            for i in range(2):
                self.players[i].startGame(i, startingPlayer, hands[i]);
                if self.playVerbose:
                    print("Player %d is dealt %s.\n" % (i, hands[i]))
            if self.playVerbose:
                print("Player %d starts.\n" % (startingPlayer))
            discards = []
            discards.append(deck.pop())
            if self.playVerbose:
                print("The initial face up card is %s.\n" % (discards[len(discards) - 1]))
            firstFaceUpCard = discards[len(discards) - 1]
            turnsTaken = 0
            knockMelds = None

            # 11/25 - Initial state, prior to any cards
            # 1/16 - Initialize oppCard to be two dimensional to track both players as opponents
            oppCard = []
            oppCard.extend([np.zeros(52), np.zeros(52)])

            for i in range(2):
                if isinstance(self.players[i], MLPGinRummyPlayer):
                    self.players[i].setVerbose(self.playVerbose)

            # while the deck has more than two cards remaining, play round
            while len(deck) > 2:
                if num_turns > 300:
                    print("Max Turns exceeded, restart")
                    break
                else:
                    num_turns += 1
#-------------------------------------------------------------- BPBD --------------------------------------------------------------#
                drawFaceUp = False
                faceUpCard = discards[len(discards) - 1]

                # offer draw face-up iff not 3rd turn with first face up card (decline automatically in that case)
                if not (turnsTaken == 2 and faceUpCard == firstFaceUpCard):

                    #------------------------------------ DRAW ------------------------------------#
                    # 2020-12-01  -  Track states BEFORE the player PICKUP BEFORE player DISCARDS (track_bpbd)
                    # 2021-01-16  -  Track for both players instead of just player 0
                    # Action      -  PickUp from Discard(FaceUp) or Deck (Unknown)
                    # State       -  BPBD -> APBD

                    self.updateState(currentPlayer,discards,oppCard[currentPlayer])

                    #------------------------------------------------------------------------------#

                    # 2021-01-16  -  Update player with current states
                    if isinstance(self.players[currentPlayer], MLPGinRummyPlayer):
                        self.players[currentPlayer].updateStates(self.states)

                    # both players declined and 1st player must draw face down
                    drawFaceUp = self.players[currentPlayer].willDrawFaceUpCard(faceUpCard)
                    
                    if self.playVerbose and not drawFaceUp and faceUpCard == firstFaceUpCard and turnsTaken < 2:
                        print("Player %d declines %s.\n" % (currentPlayer, firstFaceUpCard))

                if not (not drawFaceUp and turnsTaken < 2 and faceUpCard == firstFaceUpCard):

                    # continue with turn if not initial declined option
                    if self.playVerbose:
                        if drawFaceUp:
                            print('drawFaceUp (Pickup discarded card)')
                        else:
                            print('Draw from deck')
                    drawCard = discards.pop() if drawFaceUp else deck.pop()
                    for i in range(2):
                        to_report = drawCard if i == currentPlayer or drawFaceUp else None
                        self.players[i].reportDraw(currentPlayer, to_report)

                    if self.playVerbose:
                        print("Player %d draws %s.\n" % (currentPlayer, drawCard))
                    hands[currentPlayer].append(drawCard)
#-------------------------------------------------------------- APBD --------------------------------------------------------------#
                    
                    self.updateState(currentPlayer,discards,oppCard[currentPlayer])
                    
                    # 2021-01-16  -  Update player with current states
                    if isinstance(self.players[currentPlayer], MLPGinRummyPlayer):
                    # if type(self.players[currentPlayer]) == type(MLPGinRummyPlayer()):
                        self.players[currentPlayer].updateStates(self.states)

                    discardCard = self.players[currentPlayer].getDiscard()

                    # 2021-01-16  -  Track for both players instead of just player 0
                    # Track opponent pickup and discard after each discard 

                    # Set discarded card to 0 (in case discarded card was seen)
                    oppCard[1 - currentPlayer][discardCard.getId()] = 0
                    if drawFaceUp: # if opponent draws TopCard from discard
                        oppCard[1 - currentPlayer][drawCard.getId()] = 1

                    if not discardCard in hands[currentPlayer] or discardCard == faceUpCard:
                        print("Player %d discards %s illegally and forfeits.\n" % (currentPlayer, discardCard))
                        return opponent;
                    hands[currentPlayer].remove(discardCard)
                    for i in range(2):
                        self.players[i].reportDiscard(currentPlayer, discardCard)                    
                    if self.playVerbose:
                        print("Player %d discards %s.\n" % (currentPlayer, discardCard))
                    discards.append(discardCard)

                    if self.playVerbose:
                        unmeldedCards = hands[currentPlayer].copy()
                        bestMelds = GinRummyUtil.cardsToBestMeldSets(unmeldedCards)
                        if len(bestMelds) == 0:
                            print("Player %d has %s with %d deadwood.\n" % (currentPlayer, unmeldedCards, GinRummyUtil.getDeadwoodPoints3(unmeldedCards)))
                        else:
                            melds = bestMelds[0]
                            for meld in melds:
                                for card in meld:
                                    unmeldedCards.remove(card)
                            melds.extend(unmeldedCards)
                            print("Player %d has %s with %d deadwood.\n" % (currentPlayer, melds, GinRummyUtil.getDeadwoodPoints3(unmeldedCards)))

#-------------------------------------------------------------- KNOCK --------------------------------------------------------------#
                    # CHECK FOR KNOCK
                    knockMelds = self.players[currentPlayer].getFinalMelds()
                    if knockMelds != None:
                        # print('Current Player:', currentPlayer)
                        # print(knockMelds)
                        # break
                        # 2021-01-16  -  Check if MLPGinRummyPlayer knocks
                        if isinstance(self.players[currentPlayer], MLPGinRummyPlayer):
                            knock = self.players[currentPlayer].knockAction()
                            if self.playVerbose:
                                print(knock)
                            if knock:
                                break
                        else:
                            break
                    
                turnsTaken += 1
                # currentPlayer = 1 if currentPlayer == 0 else 0
                # opponent = 1 if currentPlayer == 0 else 0
                if len(deck) > 2:
                    currentPlayer = 1 if currentPlayer == 0 else 0
                    opponent = 1 if currentPlayer == 0 else 0

            # if knockMelds != None and len(deck) > 2:
            if knockMelds != None:
                # round didn't end due to non-knocking and 2 cards remaining in draw pile
                # check legality of knocking meld
                handBitstring = GinRummyUtil.cardsToBitstring(hands[currentPlayer])
                unmelded = handBitstring
                for meld in knockMelds:
                    meldBitstring = GinRummyUtil.cardsToBitstring(meld)
                    if (not meldBitstring in GinRummyUtil.getAllMeldBitstrings()) or ((meldBitstring & unmelded) != meldBitstring):
                        # non-meld or meld not in hand
                        # print(len(deck))
                        # print(meld)
                        # print(knockMelds)
                        # print(currentPlayer, hands[currentPlayer])
                        # print(1- currentPlayer, hands[1-currentPlayer])
                        # print(GinRummyUtil.getDeadwoodPoints1(knockMelds, hands[1-currentPlayer]))
                        print("Player %d melds %s illegally and forfeits.\n" % (currentPlayer, knockMelds))
                        return opponent
                    unmelded &= ~meldBitstring # remove successfully melded cards from

                # compute knocking deadwood
                knockingDeadwood = GinRummyUtil.getDeadwoodPoints1(knockMelds, hands[currentPlayer])
                if knockingDeadwood > GinRummyUtil.MAX_DEADWOOD:
                    print("Player %d melds %s with greater than %d deadwood and forfeits.\n" % (currentPlayer, knockMelds, knockingDeadwood))
                    return opponent

                meldsCopy = []
                for meld in knockMelds:
                    meldsCopy.append(meld.copy())
                for i in range(2):
                    self.players[i].reportFinalMelds(currentPlayer, meldsCopy)
                if self.playVerbose:
                    if knockingDeadwood > 0:
                        print("Player %d melds %s with %d deadwood from %s.\n" % (currentPlayer, knockMelds, knockingDeadwood, GinRummyUtil.bitstringToCards(unmelded)))
                    else:
                        print("Player %d goes gin with melds %s.\n" % (currentPlayer, knockMelds))

                # get opponent meld
                opponentMelds = self.players[opponent].getFinalMelds();
                meldsCopy = []
                for meld in opponentMelds:
                    meldsCopy.append(meld.copy())
                for i in range(2):
                    self.players[i].reportFinalMelds(opponent, meldsCopy)

                # check legality of opponent meld
                opponentHandBitstring = GinRummyUtil.cardsToBitstring(hands[opponent])
                opponentUnmelded = opponentHandBitstring
                for meld in opponentMelds:
                    meldBitstring = GinRummyUtil.cardsToBitstring(meld)
                    if (meldBitstring not in GinRummyUtil.getAllMeldBitstrings()) or ((meldBitstring & opponentUnmelded) != meldBitstring):
                        # non-meld or meld not in hand
                        print("Player %d melds %s illegally and forfeits.\n" % (opponent, opponentMelds))
                        return currentPlayer
                    opponentUnmelded &= ~meldBitstring # remove successfully melded cards from

                if self.playVerbose:
                    print("Player %d melds %s.\n" % (opponent, opponentMelds))

                # lay off on knocking meld (if not gin)
                unmeldedCards = GinRummyUtil.bitstringToCards(opponentUnmelded)
                if knockingDeadwood > 0:
                    # knocking player didn't go gin
                    cardWasLaidOff = False
                    while True:
                        # attempt to lay each card off
                        cardWasLaidOff = False
                        layOffCard = None
                        layOffMeld = None
                        for card in unmeldedCards:
                            for meld in knockMelds:
                                newMeld = meld.copy()
                                newMeld.append(card)
                                newMeldBitstring = GinRummyUtil.cardsToBitstring(newMeld)
                                if newMeldBitstring in GinRummyUtil.getAllMeldBitstrings():
                                    layOffCard = card
                                    layOffMeld = meld
                                    break
                            if layOffCard != None:
                                if self.playVerbose:
                                    print("Player %d lays off %s on %s.\n" % (opponent, layOffCard, layOffMeld))
                                for i in range(2):
                                    self.players[i].reportLayoff(opponent, layOffCard, layOffMeld.copy())
                                unmeldedCards.remove(layOffCard)
                                layOffMeld.append(layOffCard)
                                cardWasLaidOff = True
                                break
                        if not cardWasLaidOff:
                            break

                opponentDeadwood = 0
                for card in unmeldedCards:
                    opponentDeadwood += GinRummyUtil.getDeadwoodPoints2(card)
                if self.playVerbose:
                    print("Player %d has %d deadwood with %s\n" % (opponent, opponentDeadwood, unmeldedCards))
                # compare deadwood and compute new scores
                if knockingDeadwood == 0:
                    # gin round win
                    scores[currentPlayer] += GinRummyUtil.GIN_BONUS + opponentDeadwood
                    if self.playVerbose:
                        print("Player %d scores the gin bonus of %d plus opponent deadwood %d for %d total points.\n" % \
                        (currentPlayer, GinRummyUtil.GIN_BONUS, opponentDeadwood, GinRummyUtil.GIN_BONUS + opponentDeadwood))

                elif knockingDeadwood < opponentDeadwood:
                    # non-gin round win:
                    scores[currentPlayer] += opponentDeadwood - knockingDeadwood;
                    if self.playVerbose:
                        print("Player %d scores the deadwood difference of %d.\n" % (currentPlayer, opponentDeadwood - knockingDeadwood))

                else:
                    # undercut win for opponent
                    scores[opponent] += GinRummyUtil.UNDERCUT_BONUS + knockingDeadwood - opponentDeadwood;
                    if self.playVerbose:
                        print("Player %d undercuts and scores the undercut bonus of %d plus deadwood difference of %d for %d total points.\n" % \
                        (opponent, GinRummyUtil.UNDERCUT_BONUS, knockingDeadwood - opponentDeadwood, GinRummyUtil.UNDERCUT_BONUS + knockingDeadwood - opponentDeadwood))

                startingPlayer = 1 if startingPlayer == 0 else 0 # starting player alternates

            # If the round ends due to a two card draw pile with no knocking, the round is cancelled.
            else:
                if self.playVerbose:
                    print("The draw pile was reduced to two cards without knocking, so the hand is cancelled.")

            # report final hands
            for i in range(2):
                for j in range(2):
                    self.players[i].reportFinalHand(j, hands[j].copy())

            # score reporting
            if self.playVerbose:
                print("Player\tScore\n0\t%d\n1\t%d\n" % (scores[0], scores[1]))
            for i in range(2):
                self.players[i].reportScores(scores.copy())

        if self.playVerbose:
            print("Player %s wins.\n" % (0 if scores[0] > scores[1] else 1))
        return 0 if scores[0] >= GinRummyUtil.GOAL_SCORE else 1

## Shared

In [10]:
def testAgents(agent0,agent1,numGames,verbose):
    numP1Wins = 0
    game = GinRummyGame(agent0, agent1)
    # Multiple non-verbose games
    game.setPlayVerbose(verbose)
    # for i in range(2):
    #     if isinstance(game.players[i], MLPGinRummyPlayer):
    #         print(game.players[i].model)
    for i in range(numGames):
        if i % 250 == 0:
        # if i % 100 == 0:
            print("Game ... ", i)
        # set random seed to make testing consistent
        random.seed(i)
        numP1Wins += game.play()
    print("Games Won: P0:%d, P1:%d.\n" % (numGames - numP1Wins, numP1Wins))

In [11]:
state = 'all'
action = 'all'

# Test Agents

## DQN - baseline

In [12]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 0
# Wins2:

batch_norm = False
top_layer = False
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/baseline'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [13]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [14]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

### Post vs. Pre

In [None]:
# numGames = 1000
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# agent0 = MLPGinRummyPlayer()
# checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
# qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
# qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
# agent0.loadModel(qnet)
# agent2 = MLPGinRummyPlayer()
# checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
# qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
# qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
# agent2.loadModel(qnet)
# testAgents(agent0,agent2,numGames,verbose=False)

## DQN - baseline 1HL

In [63]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 138/74/137/80
# Wins2:

batch_norm = False
top_layer = False
knock_layer = False
mlp_layers = [520, 110]
model = 'models/dqn/final/baseline_1HL'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:138, P1:862.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:74, P1:926.



In [16]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:73, P1:927.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:137, P1:863.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:135, P1:865.



In [17]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:141, P1:859.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:138, P1:862.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:80, P1:920.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Game ...  750
Games Won: P0:321, P1:679.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Game ...  750
Max Turns exceeded, restart
Player 0 melds [[9D, TD, JD, QD, KD], [4S, 4D, 4C]] illegally and forfeits.

Max Turns exceeded, restart
Games Won: P0:159, P1:841.



### Post vs. Pre

In [64]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 1 melds [[2S, 2H, 2D, 2C], [5S, 5H, 5D]] illegally and forfeits.

Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Games Won: P0:280, P1:720.



## DQN - baseline 2HL_40k

In [61]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 280/160/220/167
# Wins2: 282/282

batch_norm = False
top_layer = False
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/baseline_2HL_40k'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:280, P1:720.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:161, P1:839.



In [19]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:181, P1:819.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:220, P1:780.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:282, P1:718.



In [20]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:282, P1:718.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:282, P1:718.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:167, P1:833.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_40K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Max Turns exceeded, restart
Games Won: P0:539, P1:461.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_40K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 1 melds [[2S, 3S, 4S], [8D, 9D, TD, JD, QD]] illegally and forfeits.

Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 1 melds [[4S, 4H, 4D], [7S, 7H, 7D, 7C]] illegally and forfeits.

Games Won: P0:332, P1:668.



### Post vs. Pre

In [62]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 1 melds [[2S, 3S, 4S], [8D, 9D, TD, JD, QD]] illegally and forfeits.

Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 1 melds [[4S, 4H, 4D], [7S, 7H, 7D, 7C]] illegally and forfeits.

Games Won: P0:332, P1:668.



## DQN - baseline 2HL_80k

In [65]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/baseline_2HL_80k'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:304, P1:696.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:205, P1:795.



In [22]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:195, P1:805.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:308, P1:692.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:304, P1:696.



In [23]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:273, P1:727.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:323, P1:677.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:194, P1:806.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:492, P1:508.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Player 1 melds [[7S, 8S, 9S, TS], [3S, 3D, 3C]] illegally and forfeits.

Max Turns exceeded, restart
Game ...  500
Game ...  750
Games Won: P0:350, P1:650.



### Post vs. Pre

In [66]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Player 1 melds [[7S, 8S, 9S, TS], [3S, 3D, 3C]] illegally and forfeits.

Max Turns exceeded, restart
Game ...  500
Game ...  750
Games Won: P0:350, P1:650.



## DQN - Frozen

In [24]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/frozen'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:304, P1:696.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:238, P1:762.



In [25]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:261, P1:739.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:242, P1:758.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:312, P1:688.



In [26]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:290, P1:710.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:304, P1:696.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:231, P1:769.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:492, P1:508.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:408, P1:592.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:408, P1:592.



## DQN - Random Top Layer

In [None]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = True
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/random'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:990, P1:10.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Player 0 melds [[AS, AD, AC], [7S, 7H, 7D, 7C]] illegally and forfeits.

Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Games Won: P0:0, P1:1000.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Max Turns exceeded, restart
Games Won: P0:0, P1:1000.



### Post vs. Pre

In [None]:
# numGames = 1000
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# agent0 = MLPGinRummyPlayer()
# checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
# qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
# qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
# agent0.loadModel(qnet)
# agent2 = MLPGinRummyPlayer()
# checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
# qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
# qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
# agent2.loadModel(qnet)
# testAgents(agent0,agent2,numGames,verbose=False)

## DQN - Copy Top Layer

In [None]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = True
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/copy'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:997, P1:3.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:63, P1:937.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:63, P1:937.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:63, P1:937.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:63, P1:937.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Games Won: P0:176, P1:824.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1, P1:999.



# Test Agents - Rewards

## DQN - v2

Gin/Knock/Other = 1/0.4/-0.01 per DW

In [27]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/rewards/v2'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:304, P1:696.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:237, P1:763.



In [28]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:267, P1:733.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:303, P1:697.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:312, P1:688.



In [29]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:290, P1:710.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:304, P1:696.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:243, P1:757.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:492, P1:508.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Game ...  250
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:408, P1:592.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Game ...  250
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:408, P1:592.



## DQN - v3

Gin/Knock/Other = 1/1/-0.01 per DW

In [30]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/rewards/v3'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:304, P1:696.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:279, P1:721.



In [31]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:250, P1:750.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:247, P1:753.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:312, P1:688.



In [32]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:290, P1:710.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:303, P1:697.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:257, P1:743.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:492, P1:508.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Games Won: P0:438, P1:562.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Games Won: P0:438, P1:562.



## DQN - v4

Gin/Knock/Other = 1/10/-0.01 per DW

In [33]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/rewards/v4'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:304, P1:696.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:289, P1:711.



In [34]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:280, P1:720.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:272, P1:728.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:312, P1:688.



In [35]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:289, P1:711.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:289, P1:711.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:118, P1:882.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:492, P1:508.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 1 melds [[2S, 2D, 2C], [4S, 4D, 4C]] illegally and forfeits.

Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 0 melds [[AS, 2S, 3S, 4S, 5S], [4H, 4D, 4C]] illegally and forfeits.

Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Games Won: P0:260, P1:740.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 1 melds [[2S, 2D, 2C], [4S, 4D, 4C]] illegally and forfeits.

Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 0 melds [[AS, 2S, 3S, 4S, 5S], [4H, 4D, 4C]] illegally and forfeits.

Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Games Won: P0:260, P1:740.



## DQN - v5

Gin/Knock/Other = 0/10/-0.01 per DW

In [36]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/rewards/v5'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:304, P1:696.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:289, P1:711.



In [37]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:271, P1:729.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:312, P1:688.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:312, P1:688.



In [38]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:289, P1:711.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:289, P1:711.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:113, P1:887.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:492, P1:508.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Player 0 melds [[4S, 4D, 4C], [6S, 6H, 6D]] illegally and forfeits.

Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Games Won: P0:241, P1:759.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Player 0 melds [[4S, 4D, 4C], [6S, 6H, 6D]] illegally and forfeits.

Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Games Won: P0:241, P1:759.



## DQN - v7

Gin/Knock/Other = 0/10/0

In [39]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/rewards/v7'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:304, P1:696.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:289, P1:711.



In [40]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:257, P1:743.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:312, P1:688.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:312, P1:688.



In [41]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:289, P1:711.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:289, P1:711.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:131, P1:869.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:492, P1:508.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 1 melds [[3S, 3H, 3D, 3C], [QS, QH, QC]] illegally and forfeits.

Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Games Won: P0:278, P1:722.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 1 melds [[3S, 3H, 3D, 3C], [QS, QH, QC]] illegally and forfeits.

Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Games Won: P0:278, P1:722.



# Test Agents - Psuedo Identity

## all_data

In [None]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = True
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/pseudo/all_data'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1, P1:999.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Game ...  250
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:1, P1:999.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:10, P1:990.



## 10pct_data

In [42]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = True
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/pseudo/10pct_data'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:976, P1:24.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [43]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [44]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Games Won: P0:1, P1:999.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  250
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Games Won: P0:0, P1:1000.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

## 5K_data

In [None]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = True
knock_layer = False
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/pseudo/5K_data'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:865, P1:135.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:0, P1:1000.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  500
Game ...  750
Max Turns exceeded, restart
Games Won: P0:0, P1:1000.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  250
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 0 melds [[4H, 5H, 6H], [2H, 2D, 2C]] illegally and forfeits.

Game ...  750
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 0 melds [[8H, 9H, TH, JH], [4S, 4H, 4D]] illegally and forfeits.

Max Turns exceeded, restart
Max Turns exceeded, restar

### Post vs. Pre

In [None]:
# numGames = 1000
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# agent0 = MLPGinRummyPlayer()
# checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
# qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
# qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
# agent0.loadModel(qnet)
# agent2 = MLPGinRummyPlayer()
# checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
# qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
# qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
# agent2.loadModel(qnet)
# testAgents(agent0,agent2,numGames,verbose=False)

# Test Agents - Knock

## knock_1

In [45]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = True
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/knock/knock_1'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:335, P1:665.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:343, P1:657.



In [46]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:358, P1:642.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:343, P1:657.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:337, P1:663.



In [47]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:335, P1:665.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:335, P1:665.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:371, P1:629.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:529, P1:471.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Max Turns exceeded, restart
Games Won: P0:551, P1:449.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:521, P1:479.



In [None]:
numGames = 5000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Game ...  1000
Game ...  1250
Game ...  1500
Max Turns exceeded, restart
Game ...  1750
Game ...  2000
Game ...  2250
Game ...  2500
Game ...  2750
Game ...  3000
Game ...  3250
Game ...  3500
Game ...  3750
Max Turns exceeded, restart
Game ...  4000
Game ...  4250
Game ...  4500
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  4750
Games Won: P0:2699, P1:2301.



## knock_pt002

In [48]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = True
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/knock/knock_pt002'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:309, P1:691.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:238, P1:762.



In [49]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:273, P1:727.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:270, P1:730.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:280, P1:720.



In [50]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:293, P1:707.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:274, P1:726.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:233, P1:767.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:496, P1:504.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Games Won: P0:447, P1:553.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Max Turns exceeded, restart
Game ...  250
Game ...  500
Max Turns exceeded, restart
Max Turns exceeded, restart
Game ...  750
Games Won: P0:450, P1:550.



## knock_pt02

In [51]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = True
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/knock/knock_pt02'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:309, P1:691.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:285, P1:715.



In [52]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:275, P1:725.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:247, P1:753.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:306, P1:694.



In [53]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:301, P1:699.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:293, P1:707.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:278, P1:722.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:494, P1:506.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Game ...  750
Games Won: P0:427, P1:573.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Max Turns exceeded, restart
Player 1 melds [[4S, 5S, 6S, 7S], [3D, 4D, 5D]] illegally and forfeits.

Game ...  500
Game ...  750
Games Won: P0:429, P1:571.



## knock_pt2

In [54]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = True
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/knock/knock_pt2'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:310, P1:690.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:276, P1:724.



In [55]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:281, P1:719.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:315, P1:685.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:310, P1:690.



In [56]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:294, P1:706.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:305, P1:695.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:301, P1:699.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:508, P1:492.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:469, P1:531.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:443, P1:557.



## knock_2

In [57]:
# Wins: pre/rand/sgr/post
# Wins2: rand_k/sgr_k
# Wins: 
# Wins2:

batch_norm = False
top_layer = False
knock_layer = True
mlp_layers = [520, 520, 110]
model = 'models/dqn/final/knock/knock_2'
# Wins: 
# Wins2:

numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

agent1 = SimpleGinRummyPlayer()

### vs Random

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = RandGinRummyPlayer()
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:1000, P1:0.



### Pre Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:334, P1:666.



### Post DQN Training

In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:339, P1:661.



In [58]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:336, P1:664.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:322, P1:678.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_rand_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:334, P1:666.



In [59]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_self_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:334, P1:666.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_sgr_knock.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:334, P1:666.



In [None]:
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
testAgents(agent0,agent1,numGames,verbose=False)

Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:219, P1:781.



### vs Baseline

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Max Turns exceeded, restart
Game ...  500
Max Turns exceeded, restart
Game ...  750
Games Won: P0:528, P1:472.



In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('models/all/all/all_states_all_actions_2hl_extra_knock_data_80K/model.pt', map_location=device)
agent2.loadModel(checkpoint)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:391, P1:609.



### Post vs. Pre

In [None]:
numGames = 1000
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
agent0 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_posttrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent0.loadModel(qnet)
agent2 = MLPGinRummyPlayer()
checkpoint = torch.load('{}/model_pretrain.pth'.format(model), map_location=device)
qnet = EstimatorNetwork(mlp_layers, batch_norm, knock_layer, top_layer).to(device)
qnet.load_state_dict(load_checkpoint(checkpoint['dqn_q_estimator']))
agent2.loadModel(qnet)
testAgents(agent0,agent2,numGames,verbose=False)

Load Model
Load Model
Game ...  0
Game ...  250
Game ...  500
Game ...  750
Games Won: P0:342, P1:658.

