In [None]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(1)

import torch
from torch import optim
from torch import nn
from torch.utils.data import RandomSampler

torch.manual_seed(1)

import time

# actual game
from helper import GameData

In [None]:
inputSize = 16; outputSize = 4; neuronCountJ = 100; neuronCountK = 150

model = nn.Sequential(nn.Linear(inputSize, neuronCountJ),
                       nn.ReLU(), 
                       nn.Linear(neuronCountJ, neuronCountK),
                       nn.ReLU(),
                       nn.Linear(neuronCountK, outputSize),
                       nn.Softmax(dim=1),
                     )

#model.load_state_dict(torch.load('C:\\Users\\mpaige\\Documents\\2048\\model.pt'))

In [None]:
opt = optim.SGD(model.parameters(), lr=0.01)

In [None]:
list(model.parameters())[0].sum()

In [None]:
loss = nn.L1Loss()
scores = []

In [None]:
end_time = time.time() + 60*60*4
print('Will end at:', time.ctime(end_time))

while time.time()<end_time:
    # run the games
    data = GameData(50, method=lambda layout:model(torch.tensor(layout/layout.sum(), dtype=torch.float).reshape(1,-1)).detach().numpy().flatten())
    
    scores.append(data.final_scores.mean())
    
    weighted_scores = -1*data.final_scores+data.final_scores.max()#(data.final_scores-data.final_scores.min())/data.final_scores.std()
    weighted_scores = weighted_scores/weighted_scores.mean()
    
    game_penalties = torch.tensor(np.concatenate([np.repeat(weighted_scores[i], data.num_moves[i]) for i in range(len(data.num_moves))]), dtype=torch.float)[:, None]
    #move_weights = torch.tensor(np.concatenate([np.array([1-np.exp(-3*x) for x in np.linspace(0, 1, num=data.num_moves[i])]) for i in range(len(data.num_moves))]), dtype=torch.float)[:, None]
    
    x = torch.tensor(np.concatenate(data.layouts), dtype=torch.float)
    x = x/torch.sum(x, dim=1)[:, None]
    
    y_taken = torch.tensor(np.concatenate(data.moves), dtype=torch.float)
    
    # randomly sample moves to train on subset of all moves
    sample_indexes = list(RandomSampler(x, replacement=True, num_samples=1000))
    
    x = x[sample_indexes]
    game_penalties = game_penalties[sample_indexes]
    y_taken = y_taken[sample_indexes]
    
    
    # run model and update parameters
    y_hat = model(x)
    
    output = loss(game_penalties*y_hat, game_penalties*y_taken)
    output.backward()

    opt.step()
    opt.zero_grad()
    if len(scores)%500==0:
        print(list(model.parameters())[0].sum(), y_hat.std())

In [None]:
def moving_average(a, n=3) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

plt.plot(moving_average(np.array(scores), 200))

In [None]:
np.corrcoef(scores, range(len(scores)))

In [None]:
#torch.save(model.state_dict(), 'C:\\Users\\mpaige\\Documents\\2048\\model2.pt')