In [2]:
# This notebook is used for developing networks for subtasks of the main one. 

In [17]:
%reload_ext autoreload
%autoreload 2

from copy import copy
import random
import numpy as np
import matplotlib.pyplot as plt
from dominoes import gameplay as dg
from dominoes import agents as da
from dominoes import functions as df
from tqdm import tqdm
import torch
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F

import subtasks

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cuda


In [1]:
# # Sub Task 1: Train a network to predict self-hand value and other hand value based on the tokens in the hand
# outputs = subtasks.subTask1()
# net, testOutput, testTarget, testLoss, trainingLoss, printResults = outputs
# printResults(df.listDominoes(12), trainingLoss, testOutput, testTarget, net)

In [7]:
import itertools
import pointerNetwork as pn
from torch.utils.data import Dataset, DataLoader

import argparse
import random
import warnings

import numpy as np
import torch
import torch.nn.functional as F
from torch.optim import Adam
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset

In [8]:
# creates the dataset using a value-value embedding where each value on the dominoe is indicated
class dominoeHandDataset(Dataset):
    def __init__(self, highestDominoe, minInHand, maxInHand, numSamples=1000):
        self.highestDominoe = highestDominoe
        self.dominoes = df.listDominoes(highestDominoe)
        self.numDominoes = len(self.dominoes)
        self.minInHand = np.maximum(minInHand, 1)
        self.maxInHand = np.minimum(maxInHand, self.numDominoes)
        self.numSamples = numSamples
        self.embedDim = 2*(self.highestDominoe+1) + 1

    def __getitem__(self, index):
        cInHand = np.random.randint(self.minInHand, self.maxInHand+1)
        cHand = np.random.choice(self.numDominoes, cInHand, replace=False)
        cEmbeddings = self.embed(cHand)
        return cEmbeddings
        
    def __len__(self):
        return self.numSamples
        
    def embed(self, hand):
        numTokens = len(hand)+1
        embeddings = []
        for d in hand:
            dValues = self.dominoes[d]
            firstHalf = [1 if dValues[0] == i else 0 for i in range(self.highestDominoe+1)]
            secondHalf = [1 if dValues[1] == i else 0 for i in range(self.highestDominoe+1)]
            embeddings.append(firstHalf+secondHalf+[0])
        embeddings.append([0]*2*(self.highestDominoe+1)+[1])
        return embeddings

    def embedFast(self, hand):
        numTokens = len(hand)+1
        oneHotRowCol = 
        embeddings = torch.zeros((numTokens,self.embedDim))
        

In [16]:
x = np.array([[0,0],[1,2]])
colIdx = x + np.array([0,5])
print(colIdx)

out = np.zeros((2,10))
out[colIdx] = 1

print(out)

[[0 5]
 [1 7]]


IndexError: index 5 is out of bounds for axis 0 with size 2

In [12]:
dhdata = dominoeHandDataset(9, 3, 10)
dhLoader = DataLoader(dataset=dhdata, batch_size=64, shuffle=False, num_workers=0)

In [13]:
batch = next(iter(dhLoader))
print(len(batch))

RuntimeError: stack expects each tensor to be equal size, but got [10] at entry 0 and [5] at entry 1

In [4]:
class dominoeValueDataset(Dataset):
    def __init__(self, highestDominoe, min_len=5, max_len=20, num_samples=10000):
        self.min_len = min_len
        self.max_len = max_len
        self.num_samples = num_samples
        self.dominoes = df.listDominoes(highestDominoe)
        self.numDominoes = len(self.dominoes)
        self.dominoeValue = np.sum(self.dominoes,axis=1)
        self.hands = [np.random.randint(0, self.numDominoes, length) for \
                          length in np.random.randint(self.min_len, self.max_len, self.num_samples)]
        self.targets = [sorted(range(len(chand)), key=lambda i: self.dominoeValue[chand][i]) for chand in self.hands]
        
    def __getitem__(self, index):
        cLength = len(self.targets[index])
        cDominoes = self.hands[index]
        cTarget = self.targets[index]
        row_col_index = list(zip(*[(i,d) for i,d in enumerate(cDominoes)]))
        i = torch.LongTensor(row_col_index)
        v = torch.FloatTensor([1]*cLength)
        cData = torch.sparse.FloatTensor(i,v,torch.Size([cLength, self.numDominoes]))
        return cData, cLength, cTarget
    
    def __len__(self):
        return len(self.hands)

def sparse_seq_collate_fn(batch):
    batch_size = len(batch)

    sorted_seqs, sorted_lengths, sorted_labels = zip(*sorted(batch, key=lambda x: x[1], reverse=True))

    padded_seqs = [seq.resize_as_(sorted_seqs[0]) for seq in sorted_seqs]

    # (Sparse) batch_size X max_seq_len X input_dim
    seq_tensor = torch.stack(padded_seqs)

    # batch_size
    length_tensor = torch.LongTensor(sorted_lengths)

    padded_labels = list(zip(*(itertools.zip_longest(*sorted_labels, fillvalue=-1))))

    # batch_size X max_seq_len (-1 padding)
    label_tensor = torch.LongTensor(padded_labels).view(batch_size, -1)

    # TODO: Currently, PyTorch DataLoader with num_workers >= 1 (multiprocessing) does not support Sparse Tensor
    # TODO: Meanwhile, use a dense tensor when num_workers >= 1.
    seq_tensor = seq_tensor.to_dense()

    return seq_tensor, length_tensor, label_tensor

class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def masked_accuracy(output, target, mask):
    """Computes a batch accuracy with a mask (for padded sequences) """
    with torch.no_grad():
        masked_output = torch.masked_select(output, mask)
        masked_target = torch.masked_select(target, mask)
        accuracy = masked_output.eq(masked_target).float().mean()

        return accuracy

In [7]:
dvTrainSet = dominoeValueDataset(9,num_samples=10000)
dvTestSet = dominoeValueDataset(9,num_samples=1000)

dvTrainLoader = DataLoader(dataset=dvTrainSet, batch_size=64, shuffle=False, num_workers=0, collate_fn=sparse_seq_collate_fn)
dvTestLoader = DataLoader(dataset=dvTestSet, batch_size=64, shuffle=False, num_workers=0, collate_fn=sparse_seq_collate_fn)

cudnn.benchmark = True if device=='cuda' else False

train_loss = AverageMeter()
train_accuracy = AverageMeter()
test_loss = AverageMeter()
test_accuracy = AverageMeter()

net = pn.PointerNet(input_dim=dvDataset.numDominoes, embedding_dim=512, hidden_size=512).to(device)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-5)

numEpochs = 1000
for epoch in range(numEpochs):
    # Train
    net.train()
    for batch_idx, (seq, length, target) in enumerate(dvLoader):
        seq, length, target = seq.to(device), length.to(device), target.to(device)
        
        optimizer.zero_grad()
        log_pointer_score, argmax_pointer, mask = net(seq, length)
        
        unrolled = log_pointer_score.view(-1, log_pointer_score.size(-1))
        loss = F.nll_loss(unrolled, target.view(-1), ignore_index=-1)
        assert not np.isnan(loss.item()), 'Model diverged with loss = NaN'

        loss.backward()
        optimizer.step()

        train_loss.update(loss.item(), seq.size(0))

        mask = mask[:, 0, :]
        train_accuracy.update(masked_accuracy(argmax_pointer, target, mask).item(), mask.int().sum().item())

    # Test
    net.eval()
    for seq, length, target in dvLoader:
        seq, length, target = seq.to(device), length.to(device), target.to(device)

        log_pointer_score, argmax_pointer, mask = net(seq, length)
        unrolled = log_pointer_score.view(-1, log_pointer_score.size(-1))
        loss = F.nll_loss(unrolled, target.view(-1), ignore_index=-1)
        assert not np.isnan(loss.item()), 'Model diverged with loss = NaN'

        test_loss.update(loss.item(), seq.size(0))

        mask = mask[:, 0, :]
        test_accuracy.update(masked_accuracy(argmax_pointer, target, mask).item(), mask.int().sum().item())
    print('Epoch {}: Test\tLoss: {:.6f}\tAccuracy: {:.6f}'.format(epoch, test_loss.avg, test_accuracy.avg))


Epoch 0: Test	Loss: 2.551005	Accuracy: 0.083292
Epoch 1: Test	Loss: 2.551005	Accuracy: 0.083292
Epoch 2: Test	Loss: 2.551005	Accuracy: 0.083292
Epoch 3: Test	Loss: 2.551005	Accuracy: 0.083292
Epoch 4: Test	Loss: 2.551005	Accuracy: 0.083292
Epoch 5: Test	Loss: 2.551005	Accuracy: 0.083292
Epoch 6: Test	Loss: 2.551005	Accuracy: 0.083292
Epoch 7: Test	Loss: 2.551005	Accuracy: 0.083292


KeyboardInterrupt: 

In [None]:
for epoch in range(args.epochs):
    # Train
    net.train()
    for batch_idx, (seq, length, target) in enumerate(train_loader):
        seq, length, target = seq.to(device), length.to(device), target.to(device)
        
        optimizer.zero_grad()
        log_pointer_score, argmax_pointer, mask = net(seq, length)
        
        raise ValueError('hi')
        
        unrolled = log_pointer_score.view(-1, log_pointer_score.size(-1))
        loss = F.nll_loss(unrolled, target.view(-1), ignore_index=-1)
        assert not np.isnan(loss.item()), 'Model diverged with loss = NaN'

        loss.backward()
        optimizer.step()

        train_loss.update(loss.item(), seq.size(0))

        mask = mask[:, 0, :]
        train_accuracy.update(masked_accuracy(argmax_pointer, target, mask).item(), mask.int().sum().item())

        # if batch_idx % 20 == 0:
        #     print('Epoch {}: Train [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tAccuracy: {:.6f}'
        #           .format(epoch, batch_idx * len(seq), len(train_loader.dataset),
        #                   100. * batch_idx / len(train_loader), train_loss.avg, train_accuracy.avg))

    # Test
    net.eval()
    for seq, length, target in test_loader:
        seq, length, target = seq.to(device), length.to(device), target.to(device)

        log_pointer_score, argmax_pointer, mask = net(seq, length)
        unrolled = log_pointer_score.view(-1, log_pointer_score.size(-1))
        loss = F.nll_loss(unrolled, target.view(-1), ignore_index=-1)
        assert not np.isnan(loss.item()), 'Model diverged with loss = NaN'

        test_loss.update(loss.item(), seq.size(0))

        mask = mask[:, 0, :]
        test_accuracy.update(masked_accuracy(argmax_pointer, target, mask).item(), mask.int().sum().item())
    print('Epoch {}: Test\tLoss: {:.6f}\tAccuracy: {:.6f}'.format(epoch, test_loss.avg, test_accuracy.avg))


In [None]:
# Sub Task 2: Order dominoes with a transformer -> pointer network


# 1. embed dominoes (can be a variable number, but in each batch should be the same(?))
# 2. use a pointer network to produce the output until the last dominoe is used

class pointerNetIndex(nn.Module):
    def __init__(self, highestDominoe, embedding_dim, hidden_size):
        super().__init__()

        self.highestDominoe = highestDominoe
        self.dominoes = df.listDominoes(self.highestDominoe)
        self.numDominoes = len(self.dominoes)

        self.embedding_dim = embedding_dim
        self.batch_first = batch_first

        # Start with an embedding layer
        self.embedding = nn.Embedding(self.numDominoes, self.embedding_dim)
        self.encodedTransform = nn.Linear(self.embedding_dim, self.embedding_dim)
        self.decodedTransform = nn.Linear(self.embedding_dim, self.embedding_dim)
        self.valueTransform = nn.Linear(self.embedding_dim, 1)

        self.pointerNet = nn.GRUCell(input_size, hidden_size, bias=True, device=None, dtype=None

        # Inherited from public repository (will test later) 
        for m in self.modules():
            if isinstance(m, nn.Linear):
                if m.bias is not None:
                    torch.nn.init.zeros_(m.bias)

    def forward(self, x):

        embedded = self.embedding(x) # (batch, sequenceLength, embeddingSize)




        if self.batch_first:
            batch_size = input_seq.size(0)
            max_seq_len = input_seq.size(1)
        else:
            batch_size = input_seq.size(1)
            max_seq_len = input_seq.size(0)

        # Embedding
        embedded = self.embedding(input_seq)
        # (batch_size, max_seq_len, embedding_dim)

        # encoder_output => (batch_size, max_seq_len, hidden_size) if batch_first else (max_seq_len, batch_size, hidden_size)
        # hidden_size is usually set same as embedding size
        # encoder_hidden => (num_layers * num_directions, batch_size, hidden_size) for each of h_n and c_n
        encoder_outputs, encoder_hidden = self.encoder(embedded, input_lengths)

        encoder_h_n, encoder_c_n = encoder_hidden
        encoder_h_n = encoder_h_n.view(self.num_layers, self.num_directions, batch_size, self.hidden_size)
        encoder_c_n = encoder_c_n.view(self.num_layers, self.num_directions, batch_size, self.hidden_size)

        # Lets use zeros as an intial input for sorting example
        decoder_input = encoder_outputs.new_zeros(torch.Size((batch_size, self.hidden_size)))
        decoder_hidden = (encoder_h_n[-1, 0, :, :].squeeze(), encoder_c_n[-1, 0, :, :].squeeze())

        range_tensor = torch.arange(max_seq_len, device=input_lengths.device, dtype=input_lengths.dtype).expand(batch_size, max_seq_len, max_seq_len)
        each_len_tensor = input_lengths.view(-1, 1, 1).expand(batch_size, max_seq_len, max_seq_len)

        row_mask_tensor = (range_tensor < each_len_tensor)
        col_mask_tensor = row_mask_tensor.transpose(1, 2)
        mask_tensor = row_mask_tensor * col_mask_tensor

        pointer_log_scores = []
        pointer_argmaxs = []

        for i in range(max_seq_len):
            # We will simply mask out when calculating attention or max (and loss later)
            # not all input and hiddens, just for simplicity
            sub_mask = mask_tensor[:, i, :].float()

            # h, c: (batch_size, hidden_size)
            h_i, c_i = self.decoding_rnn(decoder_input, decoder_hidden)

            # next hidden
            decoder_hidden = (h_i, c_i)

            # Get a pointer distribution over the encoder outputs using attention
            # (batch_size, max_seq_len)
            log_pointer_score = self.attn(h_i, encoder_outputs, sub_mask)
            pointer_log_scores.append(log_pointer_score)

            # Get the indices of maximum pointer
            _, masked_argmax = masked_max(log_pointer_score, sub_mask, dim=1, keepdim=True)

            pointer_argmaxs.append(masked_argmax)
            index_tensor = masked_argmax.unsqueeze(-1).expand(batch_size, 1, self.hidden_size)

            # (batch_size, hidden_size)
            decoder_input = torch.gather(encoder_outputs, dim=1, index=index_tensor).squeeze(1)

        pointer_log_scores = torch.stack(pointer_log_scores, 1)
        pointer_argmaxs = torch.cat(pointer_argmaxs, 1)

        return pointer_log_scores, pointer_argmaxs, mask_tensor
