In [15]:
# create simple system to operate a dominoes game

# -- top level "API" for game --
# inputs:
# 1. number of players
# 2. maximum number of dominoes
# 3. rule for creating player agent, or preset list of agents
# operation:
# 1. distribute dominoes to each player randomly
# 2. create turn list
# 3. query players for their play (evaluate whether the play is valid and accept or reject it)
# 4. identify winner, track score, initiate new round until 0/0 is finished

# -- agent --
# 1. one-hot of dominoes in hand
# 2. one-hot of dominoes already played
# 3. multi-length vector, one input for each other player, containing: 1) how many dominoes in their hand, 2) whether they have a penny up, 3) how many turns until they play
# 4. number of turns until the agent plays
# 5. one-hot of dominoes available to play on
# 6. dominoe played on last turn? 

# -- simple agents --
# 1. play random dominoe
# 2. play highest dominoe
# 3. play double-pair dominoe
# 4. RL agent...

# -- observation agents --
# 1. I can train an independent RL model to predict the points at the end of each game from the current game state and run that independently. 
#    This would work by feeding in the full game state, then having the network predict the value on the next turn, and then learn from the last turn moving backwards (with uncertainty etc.)
#    Then, I can teach this observer using games from crystallized agents, and update one agent to see how much better it performs than change. 
#    Additionally, this agent could maybe be integrated into the gameplay agents? 
# 2. Could train an RL model to predict what dominoes are in other agents hands based on what has been played, their line, and what is available...

In [1]:
%reload_ext autoreload
%autoreload 2

import random
import numpy as np
import matplotlib.pyplot as plt
import dominoesGameplay as dg
import dominoesAgents as da
import dominoesFunctions as df
from tqdm import tqdm

In [104]:
numPlayers = 4
highestDominoe = 9
game = dg.dominoeGame(numPlayers, highestDominoe, agents=(da.doubleAgent, da.greedyAgent, None, None))
game.initializeHand()
game.doTurn()

In [131]:
numPlayers = 4
highestDominoe = 9
winnerCount = np.zeros(numPlayers)
numGames = 3000
for _ in tqdm(range(numGames)):
    game = dg.dominoeGame(numPlayers, highestDominoe, agents=(da.doubleAgent, da.doubleAgent, da.stupidAgent, None))
    game.playGame()
    # game.printResults()
    winnerCount[game.currentWinner]+=1
print(winnerCount)

100%|█████████████████████████████████████████████████████████████████████████████| 3000/3000 [00:14<00:00, 212.14it/s]

[880. 834. 616. 670.]





In [589]:
game.playHand()
game.printResults()

Game has already finished
[[ 31  17  20   0]
 [ 98 155  98 128]
 [106 124 121 130]
 [127 139 108 109]
 [102 112 143 128]
 [123 103 147 114]
 [114 127 124 124]
 [130 123 110 128]
 [132 124 117 120]
 [137 125 136  97]]
[1100 1149 1124 1078]
The winner is agent: 3 with a score of 1078!


In [595]:
hasattr(None, 'name')

False

In [493]:
game.lineSequence

[[48, 36, 28, 30, 47, 52, 32, 12, 1, 7, 46],
 [26, 20, 31, 16, 17, 53, 33, 27],
 [51, 49],
 [39, 38, 50, 37, 21, 23, 45]]

In [494]:
game.linePlayer

[[0, 0, 0, 0, 0, 2, 2, 2, 2, 3, 1],
 [1, 1, 1, 1, 1, 1, 2, 1],
 [2, 3],
 [3, 3, 3, 3, 0, 3, 3]]

In [495]:
game.linePlayDirection

[[1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1],
 [1, 0, 0, 1, 0, 0, 1, 0],
 [1, 0],
 [1, 0, 1, 1, 1, 0, 0]]

In [496]:
game.linePlayNumber

[[1, 5, 10, 14, 19, 30, 31, 35, 39, 40, 42],
 [2, 6, 15, 24, 29, 38, 49, 52],
 [3, 45],
 [0, 4, 8, 17, 23, 36, 44]]

In [497]:
game.dummyPlayNumber

[7, 9, 11, 12, 13, 16, 18, 20, 21, 25, 26, 27, 32, 34, 37, 41]

In [498]:
df.gameSequenceToString(game.dominoes, game.lineSequence, game.linePlayDirection, player=None, playNumber=None)

[' 9|6 ', ' 6|4 ', ' 4|3 ', ' 3|6 ', ' 6|8 ', ' 8|8 ', ' 8|3 ', ' 3|1 ', ' 1|0 ', ' 0|7 ', ' 7|6 ']
[' 9|2 ', ' 2|3 ', ' 3|7 ', ' 7|1 ', ' 1|8 ', ' 8|9 ', ' 9|3 ', ' 3|3 ']
[' 9|7 ', ' 7|7 ']
[' 9|4 ', ' 4|8 ', ' 8|7 ', ' 7|4 ', ' 4|2 ', ' 2|6 ', ' 6|6 ']


## Below this point I'm including some code blocks that make inspection of the gameplay and agent status easy... 

In [449]:
# Example of options list for current game (requires game.initializeHand() and game.presentGameState() to be run)
lineOptions, dummyOptions = game.agents[0].playOptions()
df.printDominoeList(lineOptions, game.agents[0].dominoes, name='line')
df.printDominoeList(dummyOptions, game.agents[0].dominoes, name='dummy:')
dominoe, location = game.agents[0].selectPlay()
print(f"Dominoe: {dominoe}, Location: {location}")

line 0: []
line 1: []
line 2: []
line 3: []
dummy: []
Dominoe: None, Location: None
