In [1]:
%reload_ext autoreload
%autoreload 2

import time
from copy import copy
import random
import numpy as np
import torch
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib as mpl
from dominoes import leagueManager as lm
from dominoes import gameplay as dg
from dominoes import agents as da
from dominoes import functions as df
from dominoes import fileManagement as fm
from dominoes import transformers


device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [None]:
def randomDominoeHand(numInHand, listDominoes, highestDominoe, batch_size=1, null_token=True, available_token=True):
    """method to produce an encoded random hand"""
    numDominoes = len(listDominoes)
    
    # choose dominoes from the batch, and get their value (in points)
    selection = np.stack([np.random.choice(numDominoes, numInHand, replace=False) for _ in range(batch_size)])
    if available_token:
        available = np.random.randint(0, highestDominoe+1, batch_size)
    else:
        available = [None]*batch_size
    
    # create tensor representations
    input = torch.stack([df.twohotDominoe(sel, listDominoes, highestDominoe, available=ava,
                                          available_token=available_token, null_token=null_token, with_batch=False) 
                         for sel,ava in zip(selection, available)])
    return input, selection, available
    
def getBestLine(dominoes, selection, highestDominoe):
    bestSequence = []
    bestDirection = []
    for sel in selection:
        cBestSeq = []
        cBestDir = []
        cBestVal = []
        for available in range(highestDominoe+1):
            cseq, cdir = df.constructLineRecursive(dominoes, sel, available)
            cval = [np.sum(dominoes[cs]) for cs in cseq]
            cidx = max(enumerate(cval), key=lambda x: x[1])[0]
            cBestSeq.append(cseq[cidx])
            cBestDir.append(cdir[cidx])
            cBestVal.append(cval[cidx])

        cBestIdx = max(enumerate(cBestVal), key=lambda x: x[1])[0]
        bestSequence.append(cBestSeq[cBestIdx])
        bestDirection.append(cBestDir[cBestIdx])

    return bestSequence, bestDirection

def getBestLineFromAvailable(dominoes, selection, highestDominoe, available):
    bestSequence = []
    bestDirection = []
    for sel, ava in zip(selection, available):
        cseq, cdir = df.constructLineRecursive(dominoes, sel, ava)
        cval = [np.sum(dominoes[cs]) for cs in cseq]
        cidx = max(enumerate(cval), key=lambda x: x[1])[0]
        bestSequence.append(cseq[cidx])
        bestDirection.append(cdir[cidx])
    return bestSequence, bestDirection
    
def convertToHandIndex(selection, bestSequence):
    indices = []
    for sel,seq in zip(selection, bestSequence):
        # look up table for current selection
        elementIdx = {element:idx for idx, element in enumerate(sel)}
        indices.append([elementIdx[element] for element in seq])
    return indices
    
def padBestLine(bestSequence, max_output, ignore_index=-1):
    for bs in bestSequence:
        bs += [ignore_index]*(max_output-len(bs))
    return bestSequence

def generateBatch(highestDominoe, dominoes, batch_size, numInHand, available_token=False, null_token=False, ignore_index=-1, return_full=False):
    input, selection, available = randomDominoeHand(numInHand, dominoes, highestDominoe, batch_size=batch_size, null_token=null_token, available_token=available_token)
    if available_token:
        bestSequence, bestDirection = getBestLineFromAvailable(dominoes, selection, highestDominoe, available)
        mask = torch.ones((batch_size, numInHand+1), dtype=torch.float)
        mask[:,-1]=0
    else:
        bestSequence, bestDirection = getBestLine(dominoes, selection, highestDominoe)
        mask = torch.ones((batch_size, numInHand+1))
    iseq = convertToHandIndex(selection, bestSequence)
    null_index = ignore_index if not(null_token) else numInHand
    target = torch.tensor(np.stack(padBestLine(iseq, numInHand+(1 if null_token else 0), ignore_index=null_index)), dtype=torch.long)
    if return_full:
        return input, target, mask, bestSequence, bestDirection, selection, available
    return input, target, mask
    

# input parameters
highestDominoe = 9
dominoes = df.listDominoes(highestDominoe)

batch_size = 96
numInHand = 10
null_token = True
available_token = True
num_output = numInHand + (1 if null_token else 0)
ignore_index = -1

# network parameters
input_dim = (2 if not(available_token) else 3)*(highestDominoe+1) + (1 if null_token else 0)
embedding_dim = 96
heads = 8
expansion = 1 
kqnorm = True #false 
encoding_layers = 1 #2
bias = False
decode_with_gru = False
greedy = True #false
temperature = 1
alpha = 1e-3

# create network and optimizer
net = transformers.PointerNetwork(input_dim, embedding_dim, heads=heads, expansion=expansion, kqnorm=kqnorm, encoding_layers=encoding_layers, 
                                  bias=bias, decode_with_gru=decode_with_gru, greedy=greedy, temperature=temperature)
net.to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=alpha)

# do training
net.train()
trainEpochs = 30000
trainLoss = torch.zeros(trainEpochs)
for epoch in tqdm(range(trainEpochs)):
    # zero gradients
    optimizer.zero_grad()

    # generate input batch
    input, target, mask = generateBatch(highestDominoe, dominoes, batch_size, numInHand, null_token=null_token, available_token=available_token, ignore_index=ignore_index)
    input, target, mask = input.to(device), target.to(device), mask.to(device)

    # propagate it through the network
    out_scores, out_choices = net(input, max_output=num_output)
    
    # measure loss and do backward pass
    unrolled = out_scores.view(batch_size * num_output, -1)
    loss = torch.nn.functional.nll_loss(unrolled, target.view(-1), ignore_index=ignore_index)
    loss.backward()

    # update network
    optimizer.step()

    trainLoss[epoch] = loss.item()
    
plt.close('all')
plt.plot(range(trainEpochs), trainLoss)
plt.show()

 23%|█████████████████▌                                                           | 6865/30000 [13:04<43:18,  8.90it/s]

In [84]:
input, target, mask, bseq, bdir, selection, available = generateBatch(
    highestDominoe, dominoes, batch_size, numInHand, available_token=available_token, ignore_index=ignore_index, return_full=True)

input, target, mask = input.to(device), target.to(device), mask.to(device)

# propagate it through the network
out_scores, out_choices = net(input, max_output=num_output)


In [105]:
outidx = 2
print(available[outidx])
dominoes[selection[outidx]]

6


array([[5, 7],
       [3, 3],
       [3, 5],
       [0, 4],
       [6, 8],
       [7, 8],
       [2, 7],
       [6, 6],
       [0, 8],
       [0, 6]])

In [106]:
out_choices[outidx]

tensor([4, 8, 1, 1, 8, 2, 2, 2, 7, 2], device='cuda:0')

In [107]:
target[outidx]

tensor([ 7,  4,  5,  0,  2,  1, 10, 10, 10, 10, 10])