In [1]:
import numpy as np
import torch
from torch.autograd import Variable
from torch import nn
import loader
from threes import *
from qLearningNet import *
import time

MOVES = [0, 1, 2, 3]
CUDA = False

FILENAME = "saved_parameters"
INPUT_SIZE = 19
HIDDEN_SIZE = 256
GAMMA = 0.8

In [12]:
def train(model, data_loaders, optimizer, num_epochs=500, log_every=100, verbose=True):
    if CUDA:
        model.network.cuda()

    data = np.zeros((1000,40))
    data_ptr = 0
    
    iter_ = 0
    epoch = 0
    if verbose:
        print u'Training the model!'
        print u'Interrupt at any time to get current model'
    try:
        while epoch < num_epochs:
            epoch += 1
            x = data_loaders.get(model, 30)
            #data[data_ptr:data_ptr + 30] = x
            #data_ptr += 30
            #data_ptr %= 1000 - 30
            
            #random_data_ptr = np.random.randint(0,data_ptr)
            #x = data[random_data_ptr : random_data_ptr + 30]
            
            future = x[:, 21:]
            future_scores = model.Q(future)
            for i, row in enumerate(future):
                #print(row)
                game = Threes(save_game=False, data=row.tolist())
                for j, move in enumerate(MoveEnum):
                    if not game.canMove(move):
                        future_scores[i, j] = float('-inf')
                if not game.getPossibleMoves():
                    future_scores[i:,:] = np.full((1,4), x[i, 20])
                
            y = x[:, 20] + GAMMA * np.max(future_scores, axis=1)
            xx = x[:, :19]
            iter_ += 1

            optimizer.zero_grad()
            i = np.asarray(np.vstack((np.arange(0,30),x[:,19])),int)
            
            i = torch.LongTensor(i)
            out = model.Q(xx, as_variable=True)[i[0],i[1]]
            loss = model.loss(out, y)
            loss.backward()
            optimizer.step()

            if iter_ % log_every == 0 and verbose:
                print u"Minibatch {0: >6}  | loss {1: >15.12f} ".format(iter_, loss.data[0])

    except KeyboardInterrupt:
        pass
    model.save_parameters(FILENAME)


q_learning_net = QLearningNet()
for p in q_learning_net.network.parameters():
    p.requires_grad = True
optimizer = torch.optim.Adam(q_learning_net.network.parameters(), lr=0.001)
data_loader = loader.Loader()
train(q_learning_net, data_loader, optimizer, num_epochs=25000)
print("Learning done")

Training the model!
Interrupt at any time to get current model
Minibatch    100  | loss  0.000000620268 
Minibatch    200  | loss  0.000000083176 
Minibatch    300  | loss  0.000000202193 
Minibatch    400  | loss  0.000000003988 
Minibatch    500  | loss  0.000000158907 
Minibatch    600  | loss  0.000000088219 
Minibatch    700  | loss  0.001247189124 
Minibatch    800  | loss  0.000000936921 
Minibatch    900  | loss  0.000000074460 
Minibatch   1000  | loss  0.000000488030 
Minibatch   1100  | loss  0.000000766705 
Minibatch   1200  | loss  0.000000427949 
Minibatch   1300  | loss  0.000007102639 
Minibatch   1400  | loss  0.000000115028 
Minibatch   1500  | loss  0.000000889642 
Minibatch   1600  | loss  0.000001892714 
Minibatch   1700  | loss  0.000000497900 
Minibatch   1800  | loss  0.000001486336 
Minibatch   1900  | loss  0.000000167309 
Minibatch   2000  | loss  0.000001648781 
Minibatch   2100  | loss  0.000080795064 
Minibatch   2200  | loss  0.000000039518 
Minibatch   2

Minibatch  19500  | loss  0.000001366352 
Minibatch  19600  | loss  0.000000153711 
Minibatch  19700  | loss  0.000001644709 
Minibatch  19800  | loss  0.000001371313 
Minibatch  19900  | loss  0.000000290862 
Minibatch  20000  | loss  0.000000137002 
Minibatch  20100  | loss  0.000000247624 
Minibatch  20200  | loss  0.000001403168 
Minibatch  20300  | loss  0.000000379105 
Minibatch  20400  | loss  0.000002590389 
Minibatch  20500  | loss  0.000002901296 
Minibatch  20600  | loss  0.000003586148 
Minibatch  20700  | loss  0.000000154050 
Minibatch  20800  | loss  0.000001100864 
Minibatch  20900  | loss  0.000000313443 
Minibatch  21000  | loss  0.000000264703 
Minibatch  21100  | loss  0.000000470014 
Minibatch  21200  | loss  0.000002218024 
Minibatch  21300  | loss  0.000001323432 
Minibatch  21400  | loss  0.000003518844 
Minibatch  21500  | loss  0.000000329361 
Minibatch  21600  | loss  0.000000034390 
Minibatch  21700  | loss  0.000002082264 
Minibatch  21800  | loss  0.000000

In [15]:
from AIModels import *
game = Threes(save_game=False)
ai = QLearningNetAI(game, FILENAME)
scores, move_count, highs = AIModel.test_ai(ai, 100, verbose=False)
print move_count
print np.mean(scores), np.mean(highs)

{'Down': 489, 'Right': 1090, 'Up': 873, 'Left': 876}
181.53 23.46


In [10]:
game = Threes(save_game=False)
ai = RandomAI(game)
scores, move_count, highs = AIModel.test_ai(ai, 100, verbose=False)
print move_count
print np.mean(scores), np.mean(highs)

{'Down': 1107, 'Right': 1090, 'Up': 1034, 'Left': 1119}
298.26 35.22


In [11]:
game = Threes(save_game=False)
ai = MiniMaxAI(game)
scores, move_count, highs = AIModel.test_ai(ai, 10, verbose=False)
print move_count
print np.mean(scores), np.mean(highs)

{'Down': 775, 'Right': 741, 'Up': 761, 'Left': 779}
12808.5 412.8
