In [None]:
import numpy as np
import torch
from torch.autograd import Variable
from torch import nn
import loader

MOVES = [0, 1, 2, 3]
CUDA = False

FILENAME = "saved_parameters"
INPUT_SIZE = 20
HIDDEN_SIZE = 256


def train(model, data_loaders, optimizer, num_epochs=100, log_every=100, verbose=True):
    if CUDA:
        model.network.cuda()

    iter_ = 0
    epoch = 0
    if verbose:
        print u'Training the model!'
        print u'Interrupt at any time to get current model'
    try:
        while epoch < num_epochs:
            epoch += 1
            x = data_loaders.get(model)
            #print (x.shape)
            future = x[:, 21:]
            future_scores = np.zeros((x.shape[0], len(MOVES)))
            
            for y in future:
                m = Threes(y)
            
            for i, move in enumerate(MOVES):
                future_scores[:, i] = model.Q(np.hstack((future, np.full((x.shape[0],1), move)))).ravel()
            y = x[:, 20] + np.max(future_scores, axis=1)
            x = x[:, :20]
            if CUDA:
                x = x.cuda()
                y = y.cuda()
            iter_ += 1

            optimizer.zero_grad()
            out = model.Q(x, as_variable=True)
            loss = model.loss(out, y)
            loss.backward()
            optimizer.step()

            if iter_ % log_every == 0 and verbose:
                print u"Minibatch {0: >6}  | loss {1: >5.2f} ".format(iter_, loss.data[0])

    except KeyboardInterrupt:
        pass
    model.save_parameters(FILENAME)


class QLearningNet(object):
    def __init__(self, network, criterion):
        self.network = network
        self.criterion = criterion
            
    def Q(self, batch, as_variable=False):
        batch = Variable(torch.FloatTensor(batch), requires_grad=False)
        if as_variable:
            return self.network.forward(batch) 
        else:
            return self.network.forward(batch).data.numpy()

    def loss(self, out, y):
        y = Variable(torch.FloatTensor(y), requires_grad=False)
        #print(out, y)
        return self.criterion(out, y)
    
    def save_parameters(self, filename):
        torch.save(self.network.state_dict(), filename)
        
    def load_parameters(self, filename):
        self.network.load_state_dict(torch.load(filename))


network = nn.Sequential(nn.Linear(INPUT_SIZE,HIDDEN_SIZE), nn.ReLU(), nn.Linear(HIDDEN_SIZE,1))
criterion = nn.MSELoss()
q_learning_net = QLearningNet(network, criterion)
for p in q_learning_net.network.parameters():
    p.requires_grad = True
optimizer = torch.optim.SGD(network.parameters(), lr=0.001, momentum=0.5)
data_loader = loader.Loader()
train(q_learning_net, data_loader, optimizer)

Training the model!
Interrupt at any time to get current model
[ 0.25469935]
[ 0.2383244]
[ 0.25374031]
[ 0.29346713]
[ 0.85541368]
[ 0.8318876]
[ 0.82308477]
[ 0.82666147]
[ 0.74333906]
[ 0.75036806]
[ 0.76167494]
[ 0.76987523]
[ 0.67239714]
[ 0.69040358]
[ 0.73271298]
[ 0.78127348]
[ 0.88573045]
[ 0.84701145]
[ 0.8091864]
[ 0.76869214]
[ 1.21165705]
[ 1.17421412]
[ 1.14674211]
[ 1.11202514]
[ 0.86782175]
[ 0.82854152]
[ 0.80171335]
[ 0.77909577]
[ 0.80151427]
[ 0.79287308]
[ 0.7682603]
[ 0.73865306]
[ 1.03260791]
[ 0.9940902]
[ 0.96241122]
[ 0.51683712]
[ 0.56568366]
[ 0.61954719]
[ 0.67251563]
[ 0.78775162]
[ 0.81578511]
[ 0.8317579]
[ 0.83492863]
[ 0.80779958]
[ 0.84019804]
[ 0.86274797]
[ 0.87806702]
[ 0.78111053]
[ 0.81006688]
[ 0.83744353]
[ 0.65301186]
[ 0.67378885]
[ 0.68718117]
[ 0.70519942]
[ 1.26433849]
[ 1.29321575]
[ 1.33257711]
[ 1.3714931]
[ 0.83229059]
[ 0.82551575]
[ 0.82638586]
[ 0.83618659]
[ 1.24594605]
[ 1.24233329]
[ 1.23435485]
[ 0.95135581]
[ 0.93713713]
[ 0.93

[ 3663.21826172]
[ 3686.27978516]
[ 3709.34545898]
[ 3699.44165039]
[ 3722.49536133]
[ 3768.61279297]
[ 3749.24707031]
[ 3772.28979492]
[ 3795.33959961]
[ 3754.05126953]
[ 3777.08496094]
[ 3800.12548828]
[ 3840.45507812]
[ 3863.49804688]
[ 3886.53662109]
[ 3909.57763672]
[ 14822.92578125]
[ 14883.39746094]
[ 14943.89257812]
[ 15004.38867188]
[ 15006.97363281]
[ 15067.46582031]
[ 15127.97265625]
[ 15188.48242188]
[ 15061.67382812]
[ 15122.14648438]
[ 15182.6484375]
[ 15243.15429688]
[ 15294.3671875]
[ 15354.88378906]
[ 15415.41699219]
[ 16340.09179688]
[ 16400.59765625]
[ 16521.62304688]
[ 1880.58276367]
[ 1941.00732422]
[ 2001.50500488]
[ 2062.05932617]
[ 2468.83642578]
[ 2529.27734375]
[ 2589.78173828]
[ 2650.34033203]
[ 3336.87866211]
[ 3397.23413086]
[ 3457.65576172]
[ 3518.10375977]
[ 3751.62548828]
[ 3811.96289062]
[ 3872.36328125]
[ 3932.82568359]
[ 3999.18725586]
[ 4059.5300293]
[ 4119.88623047]
[ 4180.2578125]
[ 4314.69238281]
[ 4375.08496094]
[ 4435.49023438]
[ 4471.08447266]


AttributeError: 'NoneType' object has no attribute 'value'