### Learner 001

* original tournament data are obtained from http://www.ffothello.org/informatique/la-base-wthor/
* encoded dataset used in this notebook can be downloaded at https://drive.google.com/open?id=1thIFevwYhD9Y9JIMvLPS6QF0TiIzNbKR

In [None]:
import sys
import time
import json
import numpy
import glob
import random
import copy
import dill
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

import torch
import torch.nn
import torch.optim as optim

#### Define action - label relationship here

In [None]:
label2move = {0: [0, 0], 1: [0, 1], 2: [0, 2], 3: [0, 3], 4: [0, 4], 5: [0, 5], 6: [0, 6], 
              7: [0, 7], 8: [1, 0], 9: [1, 1], 10: [1, 2], 11: [1, 3], 12: [1, 4], 13: [1, 5], 
              14: [1, 6], 15: [1, 7], 16: [2, 0], 17: [2, 1], 18: [2, 2], 19: [2, 3], 20: [2, 4], 
              21: [2, 5], 22: [2, 6], 23: [2, 7], 24: [3, 0], 25: [3, 1], 26: [3, 2], 27: [3, 5], 
              28: [3, 6], 29: [3, 7], 30: [4, 0], 31: [4, 1], 32: [4, 2], 33: [4, 5], 34: [4, 6], 
              35: [4, 7], 36: [5, 0], 37: [5, 1], 38: [5, 2], 39: [5, 3], 40: [5, 4], 41: [5, 5], 
              42: [5, 6], 43: [5, 7], 44: [6, 0], 45: [6, 1], 46: [6, 2], 47: [6, 3], 48: [6, 4], 
              49: [6, 5], 50: [6, 6], 51: [6, 7], 52: [7, 0], 53: [7, 1], 54: [7, 2], 55: [7, 3], 
              56: [7, 4], 57: [7, 5], 58: [7, 6], 59: [7, 7], 60: 'PASS'}
move2label = {tuple(q): p for p, q in label2move.items()}

#### NETWORK ARCHITECTURE

In [None]:
class Net(torch.nn.Module):
    def __init__(self):
        super().__init__()
        # define functionals
        self.fc1     = torch.nn.Linear(64, 1000)
        self.sigmoid = torch.nn.Sigmoid()
        self.fc2     = torch.nn.Linear(1000, 62)
        self.softmax = torch.nn.Softmax(dim=1)
    # end def
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.sigmoid(out)
        out = self.fc2(out)
        out = self.softmax(out)
        return out
    # end def
# end class

#### Customized loss function

In [None]:
class AlphaZERO_Loss(torch.nn.Module):        
    def forward(self, outputs, labels):
        # get the batch size
        batch_n = outputs.shape[0]

        move_probs, pred_vals = torch.split(outputs, (61, 1), 1)
        search_probs, winners = torch.split(labels , (61, 1), 1)

        # compute the loss function
        pi  = search_probs.contiguous().view(-1).float()
        logp = torch.log(move_probs).contiguous().view(-1).float()

        loss = torch.pow(pred_vals - winners, 2).sum() - pi.dot(logp)
        loss = loss / batch_n

        return loss
    # end def
# end class

In [None]:
# learning parameters
LEARNING_RATE = 0.02
MOMENTUM      = 0.9
BATCH_SIZE    = 200
EPOCH_N       = 10

In [None]:
##################################################
######### Define network and optimizer ###########
##################################################
net = Net()
optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
criterion = AlphaZERO_Loss()

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

In [None]:
# upload net to device
net = net.to(device)

In [None]:
def outcome2array(Y):
    output = []
    for i in range(len(Y)):
        _move, _winner = Y[i]
        _move = tuple(_move)
        if _winner == -1:
            _winner = 0
        # end if
        
        out = numpy.zeros(62)
        out[move2label[_move]] = 1
        out[-1] = _winner
        
        output.append(out)
    # end for
    return numpy.array(output)
# end def

In [None]:
def read_data_from_file(filename):
    with open(filename) as fin:
        content = fin.read().splitlines()
    # end with

    X, Y = [[], []]
    for row in content:
        x, y = json.loads(row)
        X.append(x)
        Y.append(y)
    # end for

    # convert to numpy array
    X = numpy.array(X)
    Y = outcome2array(Y)

    return X, Y
# end def

In [None]:
def read_some_data(files, N=20):
    input_data = []
    
    ## GET INPUT DATA ##
    # select input files from directory randomly
    sel_files = [random.choice(files) for _ in range(N)]
    data = [read_data_from_file(_file) for _file in sel_files]
    X, Y = [[], []]
    for x, y in data:
        X.extend(x)
        Y.extend(y)
    # end for
    return X, Y
# end def

### Read data

In [None]:
indir = 'data/labelled'
files = glob.glob(indir+'/*.dat')

In [None]:
def chunks(l, n):
    output = []
    for i in range(0, len(l), n):
        output.append(l[i:i+n])
    # end for 
    return output
# end def

In [None]:
def epoch(files, BATCH_SIZE, n=5):
    stime = time.time()
    #logger.info(' - selecting data sets for training')

    # read data
    X, Y = read_some_data(files=files)
    Xs = chunks(X, BATCH_SIZE)
    Ys = chunks(Y, BATCH_SIZE)

    #logger.info(' - start training')
    for _iter in range(n):
        running_loss = 0
        for i in range(len(Xs)):
            inputs = torch.from_numpy(numpy.array(Xs[i])).float()
            labels = torch.from_numpy(numpy.array(Ys[i])).float()
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            torch.nn.utils.clip_grad_norm_(net.parameters(), 0.25)
            running_loss += loss.item()
        # end for
        loss = running_loss / len(Xs)
        #logger.info('[iteration %d] loss: %.3f' % (_iter, loss))
    # end for
    return loss
# end def

(note: please ensure the database file and table are created)

In [None]:
outdir = './data/models/'

In [None]:
stime = time.time()

In [None]:
losses = []
for i in range(20000):
    loss = epoch(files, BATCH_SIZE)
    ###################
    # LOSS MONITORING #
    ###################
    if i % 200 == 0:
        losses.append(loss)
        print('iteration: %d | loss: %4.3f | time: %4.1f' % (i, loss, time.time()-stime))
    # end if

    ####################
    # MODEL VERSIONING #
    ####################
    if i % 500 == 0:
        name='oth_exp_pred-iter'+str(i).zfill(5)
        # save model
        outfile = outdir+'/'+name+'.dill'
        with open(outfile, 'wb') as fout:
            dill.dump(copy.deepcopy(net), fout)
        # end with
    # end if
# end for

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(losses)