In [15]:
# create simple system to operate a dominoes game

# -- top level "API" for game --
# inputs:
# 1. number of players
# 2. maximum number of dominoes
# 3. rule for creating player agent, or preset list of agents
# operation:
# 1. distribute dominoes to each player randomly
# 2. create turn list
# 3. query players for their play (evaluate whether the play is valid and accept or reject it)
# 4. identify winner, track score, initiate new round until 0/0 is finished

# -- agent --
# 1. one-hot of dominoes in hand
# 2. one-hot of dominoes already played
# 3. multi-length vector, one input for each other player, containing: 1) how many dominoes in their hand, 2) whether they have a penny up, 3) how many turns until they play
# 4. number of turns until the agent plays
# 5. one-hot of dominoes available to play on
# 6. dominoe played on last turn? 

# -- simple agents --
# 1. play random dominoe
# 2. play highest dominoe
# 3. play double-pair dominoe
# 4. RL agent...

# -- observation agents --
# 1. I can train an independent RL model to predict the points at the end of each game from the current game state and run that independently. 
#    This would work by feeding in the full game state, then having the network predict the value on the next turn, and then learn from the last turn moving backwards (with uncertainty etc.)
#    Then, I can teach this observer using games from crystallized agents, and update one agent to see how much better it performs than change. 
#    Additionally, this agent could maybe be integrated into the gameplay agents? 
# 2. Could train an RL model to predict what dominoes are in other agents hands based on what has been played, their line, and what is available...

In [1]:
%reload_ext autoreload
%autoreload 2

import random
import numpy as np
import matplotlib.pyplot as plt
import dominoesGameplay as dg
import dominoesAgents as da
import dominoesFunctions as df
from tqdm import tqdm

In [138]:
numPlayers = 4
highestDominoe = 9
game = dg.dominoeGame(highestDominoe, agents=(da.doubleAgent, da.greedyAgent, None, da.stupidAgent))
game.playGame()

In [161]:
numPlayers = 4
highestDominoe = 9
winnerCount = np.zeros(numPlayers)
scoreTally = np.zeros(numPlayers)
numGames = 3000
for _ in tqdm(range(numGames)):
    game = dg.dominoeGame(highestDominoe, agents=(da.doubleAgent, da.greedyAgent, None, da.stupidAgent))
    game.playGame()
    # game.printResults()
    winnerCount[game.currentWinner]+=1
    scoreTally += game.currentScore
print(winnerCount)
print(scoreTally / numGames)

100%|█████████████████████████████████████████████████████████████████████████████| 3000/3000 [00:14<00:00, 210.68it/s]

[889. 764. 708. 639.]
[1103.28933333 1106.767      1110.99733333 1117.774     ]





In [None]:
game.playHand()
game.printResults()

Game has already finished
[[ 31  17  20   0]
 [ 98 155  98 128]
 [106 124 121 130]
 [127 139 108 109]
 [102 112 143 128]
 [123 103 147 114]
 [114 127 124 124]
 [130 123 110 128]
 [132 124 117 120]
 [137 125 136  97]]
[1100 1149 1124 1078]
The winner is agent: 3 with a score of 1078!


## Below this point I'm including some code blocks that make inspection of the gameplay and agent status easy... 

In [159]:
numPlayers = 4
highestDominoe = 9
game = dg.dominoeGame(highestDominoe, agents=(da.doubleAgent, da.greedyAgent, None, da.stupidAgent))
game.playGame()
game.printResults()

[[  0  25  57  20]
 [130  97 129 123]
 [136 104 106 135]
 [118 118 140 107]
 [ 83 136 124 142]
 [127 127 113 120]
 [133 114 120 122]
 [119 151 134  87]
 [122 132 116 123]
 [119 110 133 133]]
[1087 1114 1172 1112]
The winner is agent: 0 with a score of 1087!


In [153]:
numPlayers = 4
highestDominoe = 9
game = dg.dominoeGame(highestDominoe, agents=(da.doubleAgent, da.greedyAgent, None, da.stupidAgent))
game.playHand()
df.gameSequenceToString(game.dominoes, game.lineSequence, game.linePlayDirection, player=None, playNumber=None) #player=game.linePlayer, playNumber=game.linePlayNumber)
df.gameSequenceToString(game.dominoes, game.dummySequence, game.dummyPlayDirection, player=None, playNumber=None) #player=game.linePlayer, playNumber=game.linePlayNumber)

[' 9|7 ', ' 7|7 ', ' 7|5 ', ' 5|5 ', ' 5|4 ', ' 4|0 ', ' 0|0 ']
[' 9|5 ', ' 5|6 ', ' 6|7 ', ' 7|8 ', ' 8|4 ', ' 4|7 ', ' 7|3 ', ' 3|8 ', ' 8|2 ', ' 2|4 ']
[' 9|8 ', ' 8|0 ', ' 0|7 ', ' 7|1 ', ' 1|1 ', ' 1|5 ', ' 5|3 ', ' 3|2 ']
[' 9|2 ', ' 2|6 ', ' 6|3 ', ' 3|0 ', ' 0|5 ', ' 5|8 ', ' 8|1 ']
[' 9|6 ', ' 6|0 ', ' 0|9 ', ' 9|3 ', ' 3|3 ', ' 3|4 ', ' 4|4 ', ' 4|6 ', ' 6|6 ', ' 6|1 ', ' 1|4 ', ' 4|9 ', ' 9|1 ']


In [449]:
# Example of options list for current game (requires game.initializeHand() and game.presentGameState() to be run)
lineOptions, dummyOptions = game.agents[0].playOptions()
df.printDominoeList(lineOptions, game.agents[0].dominoes, name='line')
df.printDominoeList(dummyOptions, game.agents[0].dominoes, name='dummy:')
dominoe, location = game.agents[0].selectPlay()
print(f"Dominoe: {dominoe}, Location: {location}")

line 0: []
line 1: []
line 2: []
line 3: []
dummy: []
Dominoe: None, Location: None
