<h1>Install Dependcies</h1>

In [61]:
!pip install tensorflow
!pip install gymnasium
!pip install keras
!pip install keras-rl2

Collecting gymnasium
  Obtaining dependency information for gymnasium from https://files.pythonhosted.org/packages/a8/4d/3cbfd81ed84db450dbe73a89afcd8bc405273918415649ac6683356afe92/gymnasium-0.29.1-py3-none-any.whl.metadata
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
   ---------------------------------------- 0.0/953.9 kB ? eta -:--:--
   - ------------------------------------- 41.0/953.9 kB 991.0 kB/s eta 0:00:01
   ------ --------------------------------- 153.6/953.9 kB 1.8 MB/s eta 0:00:01
   -------- ------------------------------- 204.8/953.9 kB 1.6 MB/s eta 0:00:01
   ------------- -------------------------- 317.4/953.9 kB 1.8 MB/s eta 0:00:01
   ------------------ --------------------- 430.1/953.9 kB 1.8 MB/s eta 0:00:01
   ---------------------- ----------------- 542.7/953.9 kB 2.

<h1>Imports</h1>

In [63]:
import gymnasium as gym
from gym import Env
from gym.spaces import Discrete,MultiDiscrete, Box
import numpy as np

<h1>Create Game Classes</h1>

In [1]:
# %load classes.py
import functools
import random
class Domino():
    def __init__(self, s1:int,s2:int):
        self.sides = (s1,s2)
        self.isDouble = (s1 == s2)
    def evalute_side(self, side: int):
        if(self.sides[0] == side): return 0
        elif (self.sides[1] == side): return 1
    def calc_points(self):
        return self.sides[0] + self.sides[1]
    def __str__(self):
        return str(self.sides)


class BoneYard():
    def __init__(self):
        self.dominos = []
        self.build()
        self.shuffle()
    def build(self):
        for i in range(0,13):
            for j in range(i,13):
                self.dominos.append(Domino(i,j))
    def shuffle(self):
        random.shuffle(self.dominos)
    def draw(self):
        if len(self.dominos) == 0: return False
        return self.dominos.pop()


class Train():
    startingSide = 12
    def __init__(self, id):
        self.openSides = [Train.startingSide]
        self.trainUp = False
        self.id = id
    def add(self,placement,domino):
        placeIndex = self.openSides.index(placement)
        trainSide = self.openSides[placeIndex]
        if(trainSide in domino.sides):
            if(domino.isDouble):
                self.openSides.pop(placeIndex)
                self.openSides.append(domino.sides[0])
                self.openSides.append(domino.sides[1])
            else:
                self.openSides.pop(placeIndex)
                self.openSides.append(domino.sides[1-domino.evalute_side(trainSide)])
        else:
            return False
    def __str__(self):
        return f"id: {self.id} trainUp?:{self.trainUp} openSides: {self.openSides}"
        

class Player():
    handSize = 12
    nextID = 0 
    def __init__(self,boneYard:BoneYard):
        self.id = Player.nextID
        Player.nextID +=1
        self.hand = []
        for _ in range(Player.handSize): self.hand.append(boneYard.draw()) 
    def highestDouble(self):
        highest = -1
        for domino in self.hand:
            if(domino.isDouble and domino.sides[0] > highest): highest = domino.sides[0]
        return highest
    def intializeTrain(self):
        if not hasattr(self,"train"):
            self.train = Train(self.id)
        return self.train
    def getDominoFromSides(self,s1:int,s2:int):
        for domino in self.hand:
            if (domino.sides == (s1,s2) or domino.sides == (s2,s1)):
                return domino
    def play(self, domino:Domino, placement:int, train:Train|None=None,firstDouble:bool=False):
        selfTrain = False
        played = False
        if train is None: 
            train = self.train
            selfTrain = True
        if firstDouble: 
            self.hand.remove(domino)
            played = True
        elif train.add(placement, domino) != False:
            for domino2 in self.hand:
                print([domino],[domino2])
                if domino2 == domino: 
                    break
            else:
                print([str(domino) for domino in self.hand])
            self.hand.remove(domino)
            if selfTrain: self.train.trainUp = False
            played = True
        if len(self.hand) <= 0: return None
        else: return played
        

    def pointsInHand(self):
        return functools.reduce(lambda acc, domino: acc + domino.calc_points(), self.hand, 0)
    def pickup(self,boneYard:BoneYard):
        domino = boneYard.draw()
        if domino is not False:
            self.hand.append(domino)
        return domino
    def __str__(self):
        return f"id:{self.id} train:{self.train.id}"
    



class Game():
    def __init__(self,numPlayers:int):
        self.boneyard = BoneYard()
        self.players = []
        self.trains = []
        self.done = False
        self.numPlayers = numPlayers
        if(numPlayers<= 4): Player.handSize = 15
        elif(numPlayers<=6): Player.handSize = 12
        elif(numPlayers<=8): Player.handSize = 10
        Player.nextID = 0
        for _ in range(numPlayers): 
            self.players.append(Player(self.boneyard))
        doubles = [player.highestDouble() for player in self.players]
        highestDouble = max(doubles)
        firstPlayer = doubles.index(highestDouble)
        self.currentPlayer = firstPlayer
        firstDomino = self.players[firstPlayer].getDominoFromSides(highestDouble,highestDouble)
        for player in self.players: 
            player.intializeTrain()
            self.trains.append(player.train)
        if (len(self.players)<8): self.mexican = Train(8)
        self.centerDouble = max(doubles)
        Train.startingSide = self.centerDouble
        self.players[firstPlayer].play(firstDomino,0,firstDouble=True) #removing first double
        self.stepPlayer() #first player skiping turn
    def stepPlayer(self):
        self.currentPlayer += 1 
        if (self.currentPlayer>=self.numPlayers): self.currentPlayer = 0# looping if its not an actual player
    def getTrain(self,id:int):
        if id == 8: return self.mexican
        else:
            for t in self.trains:
                if t.id==id:
                    return t
            else:
                return None
    def getPlayer(self,id:int):
        if id > 7: return None
        else: 
            for p in self.players:
                if p.id==id:
                    return p
            else:
                return None

class BoardState():
    def __init__(self, trains:list[Train],centerDouble:int, mexican:Train|None = None ):
        self.mexican = mexican
        self.trains = trains
        self.unsastifiedDouble = None
    #train up returns only sides that are on trains with thier trains up
    #maybe this signature should be changed to just take a list of trains? and let caller deal with filtering?
    def getPlacements(self, trainUp: bool=False,include:list[Train]=[], exclude:list[Train]=[]):
        trains = [*self.trains,self.mexican]
        placements = []
        if trainUp:
            for train in trains:
                if (train in include or train.trainUp) and train not in exclude:
                    for side in train.openSides: 
                        placements.append((train.id,side))
        else:
            for train in trains: 
                if train not in exclude:
                    for side in train.openSides: placements.append((train.id,side))
        return placements
    def getTrain(self, id):
        for train in self.trains:
            if train.id == id: return train
    def availablePlays(self, player:Player,placements:list|None=None):
        plays = []
        places = []
        if self.unsastifiedDouble is not None:
            places = [self.unsastifiedDouble]
        elif placements is not None:
            places = placements
        elif player.train.trainUp:
            places = [(player.id, side) for side in player.train.openSides]
        else: places = self.getPlacements(trainUp=True, include=[player.train])
        for placement in places:
            for domino in player.hand:
                eval = domino.evalute_side(placement[1])
                if( eval is not None): plays.append((domino.sides, placement))
        return plays
    def isValidPlay(self, player:Player, action:list[list]):
        valid = False
        plays = self.availablePlays(player)
        for play in plays:
            tuplist = [tuple(list) for list in action]
            if tuple(tuplist) == play:
                valid = True
                print(tuplist, play)
        return valid
    @staticmethod
    def fromGame(game:Game):
        return BoardState(game.trains,game.centerDouble, game.mexican)


<h2>Test Game Objects</h2>

<h3>Testing intialization</h3>

In [31]:
game = Game(6)
bs = BoardState.fromGame(game)
print([str(player) for player in game.players])
print([str(domino) for domino in game.players[0].hand])
print(Train.startingSide)
plays = bs.availablePlays(game.players[0])
print(plays)


['id:0 train:0', 'id:1 train:1', 'id:2 train:2', 'id:3 train:3', 'id:4 train:4', 'id:5 train:5']
['(1, 3)', '(3, 10)', '(2, 5)', '(0, 1)', '(0, 6)', '(7, 10)', '(4, 4)', '(9, 11)', '(4, 11)', '(8, 8)', '(10, 12)', '(6, 8)']
11
[((9, 11), (0, 11)), ((4, 11), (0, 11))]


<h3>Testing random play</h3>

In [43]:
plays = bs.availablePlays(game.players[0])
if len(plays)> 0:
    play = plays[random.randint(0, len(plays) - 1)] # play = (dominoSideTuple,placementTuple)
    player = game.getPlayer(0)
    domino = player.getDominoFromSides(*play[0])
    train = bs.getTrain(play[1][0]) # play[1] is placement, = (train.id, side)
    print(train)
    player.play(domino,play[1][1],train)
    print(train)
else:
    print("no possible plays")

id: 0 trainUp?:False openSides: [12]
id: 0 trainUp?:False openSides: [4]


<h1>Start Making Env</h1>

In [2]:
# %load DominoEnv.py
from gym import Env
from gym.spaces import Dict, Discrete , MultiDiscrete, Box, Sequence
import numpy as np
class DominoTrainEnv(Env):
    def __init__(self,numPlayers:int):
        # Actions we can take, 13,13 for possible domino sides, [9,13] for possible domino placements
        self.action_space = MultiDiscrete(np.array([[13, 13], [9, 13]]))
        # Temperature array
        obsv =  {
        "hand": Sequence(MultiDiscrete(np.array([13, 13]), dtype=np.int8)),
        "placements": Sequence(MultiDiscrete(np.array([9, 13]), dtype=np.int8)),
        "available-actions": Sequence(MultiDiscrete(np.array([[13, 13], [9, 13]]), dtype=np.int8)),
        "trains": Sequence(MultiDiscrete(np.array([9, 2]), dtype=np.int8))
        }
        self.observation_space = Dict(obsv)
        #setup game
        self.game = Game(numPlayers)
        self.player = self.game.getPlayer(0)
        bs = BoardState.fromGame(self.game)
        handarray = [domino.sides for domino in self.player.hand]
        placements = bs.getPlacements()
        state = {
            "hand": handarray,
            "placements": placements,
            "available-actions": bs.availablePlays(self.player),
            "trains": [[train.id,train.trainUp]for train in bs.trains]
        }
        self.state = state
        self.fails = 0
        
    def play(self,domino:Domino, placement,player:Player, bs:BoardState):
        print(f"attempting to play {domino} on {placement} from {player}")
        game = self.game
        played = False
        if domino is not None:
            train = game.getTrain(placement[0])
            if player.play(domino,placement[1],train):
                played = True   
                if domino.isDouble:
                    print("double played")
                    players = game.players
                    start_index = players.index(player)
                    #loop through all players at "table" starting with person who played double
                    for i in range(len(players)):
                        index = (start_index + i) % len(players)
                        loop_player = players[index] 
                        newPlacement = (placement[0],domino.sides[0])
                        plays = bs.availablePlays(loop_player, placements=[placement])
                        print(f"checking if {loop_player} can play on {newPlacement}\n, plays {plays}")
                        #if player can't play on double
                        if len(plays)<=0:
                            pickupDomino = loop_player.pickup(self.game.boneyard)
                            #if a domino was actually picked up
                            if pickupDomino:
                                print(f"{loop_player} pickedup {pickupDomino}")
                                plays = bs.availablePlays(loop_player, placements=[newPlacement])
                                if len(plays)<=0: 
                                    print("player can't play pickup")
                                    loop_player.train.trainUp = True
                                    continue
                                #if possible to play pickup
                                else:
                                    print(f"attepmting to play pickup on {newPlacement}, train {train2}")
                                    if loop_player.play(pickupDomino,newPlacement[1],train) is None:
                                        game.done = True
                                    game.stepPlayer()
                                    break
                                    
                            # no domino was pickedup, meaning boneyard is empty and end of game
                            else:
                                game.done = True
                                break
                        #if player only has one choice to play
                        elif len(plays) == 1:
                            play = plays[0]
                            ranDomino = loop_player.getDominoFromSides(*play[0])
                            print(f"attempting to play {ranDomino} on {newPlacement}, player.train: {player.train}")
                            if loop_player.play(ranDomino,newPlacement[1],train) is None:
                                game.done = True
                            game.stepPlayer()
                            break
                        #if player has many choices to play
                        else:
                            #if ai player
                            if loop_player.id == 0:
                                print(f"letting ai make choice for double play")
                                bs.unsastifiedDouble = (train.id,domino.sides[0])
                                break
                            #if other players, random choice
                            else:
                                print(player.train)
                                play = plays[random.randint(0, len(plays) - 1)]
                                ranDomino = loop_player.getDominoFromSides(*play[0])
                                print(f"attempting to play {ranDomino} on {newPlacement}, player.train: {player.train}\n available plays: {plays}")
                                if loop_player.play(ranDomino,newPlacement[1],train) is None:
                                    game.done = True
                                game.stepPlayer()
                            break
                else: game.stepPlayer()    
            else:
                   print("Invalid Placement")
        else:
            print("Invlaid Domino")
        return played
    def maskAction(self,availablActions):
        pass
    def restRandomTurns(self, bs:BoardState):
        for i in range(1,self.game.numPlayers):
            player = self.game.getPlayer(i)
            posActions = bs.availablePlays(player)
            if len(posActions)<=0:
                player.pickup(self.game.boneyard)
                posActions = bs.availablePlays(player)
                if len(posActions)>0:
                    ranAction = posActions[0]
                    ranDomino = player.getDominoFromSides(*ranAction[0])
                    print(posActions, ranAction, ranDomino,player)
                    self.play(ranDomino,ranAction[1],player,bs)
                    self.game.stepPlayer()
            else:
                ranIndex = 0
                if len(posActions)>1: ranIndex = random.randint(0, len(posActions)-1)
                ranAction = posActions[ranIndex]
                ranDomino = player.getDominoFromSides(*ranAction[0])
                print(posActions, ranAction, ranDomino,player)
                self.play(ranDomino,ranAction[1],player,bs)
                self.game.stepPlayer()
    def step(self, action):
        #print([str(domino) for domino in self.player.hand])
        reward = 0
        bs= BoardState.fromGame(self.game)
        stateChanged = False
        #if no action available
        if len(bs.availablePlays(self.player))<=0:
            self.player.pickup(self.game.boneyard)
            posActions = bs.availablePlays(self.player)
            if len(posActions)>0:
                ranAction = posActions[0]
                ranDomino = self.player.getDominoFromSides(*ranAction[0])
                self.play(ranDomino,ranAction[1],self.player,bs)
            self.restRandomTurns(bs)
            stateChanged = True
        # Check if action is valid
        elif bs.isValidPlay(self.player,action):
            # Apply action
            domino = action[0]
            domino = self.player.getDominoFromSides(*domino)
            self.play(domino,action[1],self.player,bs)
            reward += domino.calc_points()
            
            #random play for other players
            self.restRandomTurns(bs)

            stateChanged = True
            
        #invalid action   
        else: 
            reward += -500
            self.fails +=1
        if stateChanged:
            #assigning state
            handarray = [domino.sides for domino in self.player.hand]
            placements = bs.getPlacements()
            state = {
                "hand": handarray,
                "placements": placements,
                "available-actions": bs.availablePlays(self.player),
                "trains": [[train.id,train.trainUp]for train in bs.trains]
            }
            self.state = state
        done = self.game.done
        if self.fails >=10000: done = True
        if done:
            # add negative reward for points remaining in hand at game end
            reward += -1*self.player.pointsInHand()  
        
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        self.game = Game(self.game.numPlayers)
        self.player = self.game.getPlayer(0)
        bs = BoardState.fromGame(self.game)
        handarray = [domino.sides for domino in self.player.hand]
        placements = bs.getPlacements()
        state = {
            "hand": handarray,
            "placements": placements,
            "available-actions": bs.availablePlays(self.player),
            "trains": [[train.id,train.trainUp]for train in bs.trains]
        }
        self.state = state
        self.fails = 0
        return self.state
    

In [3]:
env = DominoTrainEnv(6)

In [4]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    step = 0
    while not done:
        #env.render()
        #print(step)
        step+=1
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
    print(step)

[(11, 12), (0, 11)] ((11, 12), (0, 11))
attempting to play (11, 12) on [ 0 11] from id:0 train:0
[<__main__.Domino object at 0x000001F8F6B4B350>] [<__main__.Domino object at 0x000001F8F6B48B50>]
[<__main__.Domino object at 0x000001F8F6B4B350>] [<__main__.Domino object at 0x000001F8F6B4A150>]
[<__main__.Domino object at 0x000001F8F6B4B350>] [<__main__.Domino object at 0x000001F8F6B4CF50>]
[<__main__.Domino object at 0x000001F8F6B4B350>] [<__main__.Domino object at 0x000001F8F6B4B350>]
[((1, 11), (1, 11)), ((3, 11), (1, 11))] ((3, 11), (1, 11)) (3, 11) id:1 train:1
attempting to play (3, 11) on (1, 11) from id:1 train:1
[<__main__.Domino object at 0x000001F8F6B4A610>] [<__main__.Domino object at 0x000001F8F6B4AD50>]
[<__main__.Domino object at 0x000001F8F6B4A610>] [<__main__.Domino object at 0x000001F8F6B4A0D0>]
[<__main__.Domino object at 0x000001F8F6B4A610>] [<__main__.Domino object at 0x000001F8F6B4A350>]
[<__main__.Domino object at 0x000001F8F6B4A610>] [<__main__.Domino object at 0x0

In [7]:
print(env.game.getTrain(0))

id: 0 trainUp?:False openSides: [0, 2]


In [8]:
print(env.game.currentPlayer)

0
