In [7]:
import numpy as np
import torch
from torch.autograd import Variable
from torch import nn
import loader
from threes import *
from qLearningNet import *
import time
from AIModels import *

MOVES = [0, 1, 2, 3]
CUDA = False

FILENAME = "saved_parameters"
INPUT_SIZE = 19
HIDDEN_SIZE = 256
GAMMA = 0.8

def testNet(filename=None, net=None):
    game = Threes(save_game=False)
    if filename is not None:
        ai = QLearningNetAI(game, filename=filename)
    elif net is not None:
        ai = QLearningNetAI(game, net=net)
    scores, move_count, highs = AIModel.test_ai(ai, 1000, verbose=False)
    return move_count, np.mean(scores), np.mean(highs)


# parametry sieci np. learning rate
# lepszy zbior uczacy - jak w DeepMindzie
# nagrody - musza byc wieksze - inaczej sami sobie redukujemy learning rate
# (gestsze nagrody - np. logarytm z rewardow)
# uczyc jednak dobrymi przebiegami
# http://pytorch.org/docs/master/nn.html#embedding 

In [4]:
def train(model, data_loaders, optimizer, num_epochs=500, log_every=100, verbose=True):
    if CUDA:
        model.network.cuda()

    data = np.zeros((1000,40))
    data_ptr = 0
    
    epoch = 0
    if verbose:
        print u'Training the model!'
        print u'Interrupt at any time to get current model'
    try:
        while epoch < num_epochs:
            epoch += 1
            x = data_loaders.get(model, 30)
            #data[data_ptr:data_ptr + 30] = x
            #data_ptr += 30
            #data_ptr %= 1000 - 30
            
            #random_data_ptr = np.random.randint(0,data_ptr)
            #x = data[random_data_ptr : random_data_ptr + 30]
            
            future = x[:, 21:]
            future_scores = model.Q(future)
            for i, row in enumerate(future):
                #print(row)
                game = Threes(save_game=False, data=row.tolist())
                for j, move in enumerate(MoveEnum):
                    if not game.canMove(move):
                        future_scores[i, j] = float('-inf')
                if not game.getPossibleMoves():
                    future_scores[i:,:] = np.full((1,4), x[i, 20])
                
            y = x[:, 20] + GAMMA * np.max(future_scores, axis=1)
            xx = x[:, :19]

            optimizer.zero_grad()
            i = np.asarray(np.vstack((np.arange(0,30),x[:,19])),int)
            
            i = torch.LongTensor(i)
            out = model.Q(xx, as_variable=True)[i[0],i[1]]
            loss = model.loss(out, y)
            loss.backward()
            optimizer.step()

            if epoch % log_every == 0 and verbose:
                print u"Minibatch {0: >6}  | loss {1: >15.12f} ".format(epoch, loss.data[0])
            if epoch % (log_every * 50) == 0:
                result = testNet(net=model)
                filename = FILENAME +'{}_{}'.format(epoch, int(result[1]))
                model.save_parameters(filename)
                

    except KeyboardInterrupt:
        pass
    result = testNet(net=model)
    filename = FILENAME +'{}_{}'.format(epoch, int(result[1]))
    model.save_parameters(filename)


q_learning_net = QLearningNet()
for p in q_learning_net.network.parameters():
    p.requires_grad = True
optimizer = torch.optim.Adam(q_learning_net.network.parameters(), lr=5.000)
data_loader = loader.Loader()
train(q_learning_net, data_loader, optimizer, num_epochs=10000)
print("Learning done")

Training the model!
Interrupt at any time to get current model
Minibatch    100  | loss 2248.635986328125 
Minibatch    200  | loss  0.000817402033 
Minibatch    300  | loss  0.008512672037 
Minibatch    400  | loss  0.000948334928 
Minibatch    500  | loss  0.001434783218 
Minibatch    600  | loss  6.698752880096 
Minibatch    700  | loss 300.259094238281 
Minibatch    800  | loss 32369.904296875000 
Minibatch    900  | loss 70.465797424316 
Minibatch   1000  | loss 75.683502197266 
Minibatch   1100  | loss 84.878440856934 
Minibatch   1200  | loss 60.075538635254 
Minibatch   1300  | loss 780.863037109375 
Minibatch   1400  | loss 2311.541259765625 
Minibatch   1500  | loss 994725.125000000000 
Minibatch   1600  | loss 1713.556274414062 
Minibatch   1700  | loss 414.998748779297 
Minibatch   1800  | loss 10317.613281250000 
Minibatch   1900  | loss 76677.664062500000 
Minibatch   2000  | loss 32318.750000000000 
Minibatch   2100  | loss 1013.955627441406 
Minibatch   2200  | loss 113

In [6]:
filename = FILENAME + '6000_447'
print testNet(filename = filename)

Loading net parameters
({'Down': 546, 'Right': 19343, 'Up': 6486, 'Left': 159}, 398.58600000000001, 38.676000000000002)


In [5]:
game = Threes(save_game=False)
ai = RandomAI(game)
scores, move_count, highs = AIModel.test_ai(ai, 1000, verbose=False)
print move_count
print np.mean(scores), np.mean(highs)

{'Down': 10711, 'Right': 10507, 'Up': 10646, 'Left': 10809}
288.54 33.432


In [11]:
game = Threes(save_game=False)
ai = MiniMaxAI(game)
scores, move_count, highs = AIModel.test_ai(ai, 10, verbose=False)
print move_count
print np.mean(scores), np.mean(highs)

{'Down': 775, 'Right': 741, 'Up': 761, 'Left': 779}
12808.5 412.8
