In [7]:
import numpy as np
import torch
from torch.autograd import Variable
from torch import nn
import loader
from threes import *
import time

MOVES = [0, 1, 2, 3]
CUDA = False

FILENAME = "saved_parameters"
INPUT_SIZE = 19
HIDDEN_SIZE = 256
GAMMA = 0.8


def train(model, data_loaders, optimizer, num_epochs=500, log_every=100, verbose=True):
    if CUDA:
        model.network.cuda()

    data = np.zeros((1000,40))
    data_ptr = 0
    
    iter_ = 0
    epoch = 0
    if verbose:
        print u'Training the model!'
        print u'Interrupt at any time to get current model'
    try:
        while epoch < num_epochs:
            epoch += 1
            x = data_loaders.get(model, 30)
            #data[data_ptr:data_ptr + 30] = x
            #data_ptr += 30
            #data_ptr %= 1000 - 30
            
            #random_data_ptr = np.random.randint(0,data_ptr)
            #x = data[random_data_ptr : random_data_ptr + 30]
            
            future = x[:, 21:]
            future_scores = model.Q(future)
            for i, row in enumerate(future):
                #print(row)
                game = Threes(save_game=False, data=row.tolist())
                for j, move in enumerate(MoveEnum):
                    if not game.canMove(move):
                        future_scores[i, j] = float('-inf')
                if not game.getPossibleMoves():
                    future_scores[i:,:] = np.full((1,4), x[i, 20])
                
            y = x[:, 20] + GAMMA * np.max(future_scores, axis=1)
            xx = x[:, :19]
            iter_ += 1

            optimizer.zero_grad()
            i = np.asarray(np.vstack((np.arange(0,30),x[:,19])),int)
            
            i = torch.LongTensor(i)
            out = model.Q(xx, as_variable=True)[i[0],i[1]]
            loss = model.loss(out, y)
            loss.backward()
            optimizer.step()

            if iter_ % log_every == 0 and verbose:
                print u"Minibatch {0: >6}  | loss {1: >5.2f} ".format(iter_, loss.data[0])

    except KeyboardInterrupt:
        pass
    model.save_parameters(FILENAME)


class QLearningNet(object):
    def __init__(self, network, criterion):
        self.network = network
        self.criterion = criterion
            
    def Q(self, batch, as_variable=False):
        batch = Variable(torch.FloatTensor(batch), requires_grad=False)
        if as_variable:
            return self.network.forward(batch) 
        else:
            return self.network.forward(batch).data.numpy()
        
    def loss(self, out, y):
        y = Variable(torch.FloatTensor(y), requires_grad=False)
        return self.criterion(out, y)
    
    def save_parameters(self, filename):
        torch.save(self.network.state_dict(), filename)
        
    def load_parameters(self, filename):
        self.network.load_state_dict(torch.load(filename))


network = nn.Sequential(nn.Linear(INPUT_SIZE,HIDDEN_SIZE), nn.Tanh(), nn.Linear(HIDDEN_SIZE,4))
criterion = nn.MSELoss()
q_learning_net = QLearningNet(network, criterion)
for p in q_learning_net.network.parameters():
    p.requires_grad = True
optimizer = torch.optim.Adam(network.parameters(), lr=0.001)
data_loader = loader.Loader()
train(q_learning_net, data_loader, optimizer, num_epochs=100)
print("Learning done")

Training the model!
Interrupt at any time to get current model
Minibatch    100  | loss  0.00 
Learning done


In [9]:
def printer(curr_game):
    board = curr_game.stateInfo().board
    for ys in board:
        for el in ys:
            x = u"."
            if el != 0:
                x = el
            print u"{:>4}".format(x),
        print u""

if __name__ == u'__main__':
    seed = int(time.time())
    random.seed(seed)
    game = Threes(save_game=False)
    file = u''
    if game.save_game:
        filename = getFilename()
        file = open(filename, u'a+')
        file.write(unicode(seed) + u'\n')
        file.flush()
    printer(game)
    moves_dict = {u"w": MoveEnum.Up,
                  u"a": MoveEnum.Left,
                  u"s": MoveEnum.Down,
                  u"d": MoveEnum.Right}
    while True:
        any_move = False
        for m in moves_dict.values():
            any_move = any_move or game.canMove(m)
        if not any_move:
            break
        move = None
        best_result = float('-inf')
        res = q_learning_net.Q(game.data())
        m = sorted(game.getPossibleMoves(), key=lambda x: res[x.value])[0]
        if game.canMove(m):
            if game.save_game:
                saveState(game, m, file)
            game.makeMove(m)
        else:
            print u"THE MOVE IS NOT VALID!"
        print
        printer(game)

   3    .    1    2 
   .    .    1    . 
   1    3    .    . 
   .    2    3    2 

   3    1    2    . 
   .    1    .    2 
   1    3    .    . 
   2    3    2    . 

   3    3    .    . 
   1    .    2    1 
   1    3    .    . 
   2    3    2    . 

   6    .    .    . 
   1    2    1    3 
   1    3    .    . 
   2    3    2    . 

   6    .    .    . 
   3    1    3    2 
   1    3    .    . 
   2    3    2    . 

   6    1    3    2 
   3    3    .    . 
   3    3    2    . 
   .    1    .    . 

   6    1    3    2 
   6    .    .    . 
   6    2    .    3 
   1    .    .    . 

   6    1    3    2 
   6    .    .    . 
   6    2    3    1 
   1    .    .    . 

  12    1    3    2 
   6    2    3    1 
   1    .    .    . 
   2    .    .    . 

   3    .    .    . 
  12    1    3    2 
   6    2    3    1 
   3    .    .    . 

   3    1    3    2 
  12    2    3    1 
   6    .    .    . 
   3    .    .    2 

   3    1    3    2 
  12    2    3    1 
   6    .    .    . 
  