In [1]:
import matplotlib.pyplot as plt
from tqdm import tqdm, trange
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from bokePolicy import PolicyNet, NinebyNineGames
%matplotlib inline

In [2]:
SCALE = 100
dir = r"/home/jupyter/BokeGo/data/boards.csv"
data = NinebyNineGames(dir, scale = SCALE)
dataloader = DataLoader(data, batch_size = 128, shuffle = True)

pi = PolicyNet(scale = SCALE)
err = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(pi.parameters(), lr = 0.01)

In [3]:
#GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
pi.to(device)

epochs = 1

for epoch in range(epochs):
    running_loss = 0.0
    for i, data in tqdm(enumerate(dataloader,0)):
        inputs, moves = data
        inputs, moves = inputs.to(device), moves.to(device)
        
        optimizer.zero_grad()
        outputs = pi(inputs)
        
        #backprop
        loss = err(outputs, moves)
        loss.backward()
        optimizer.step()
    
        running_loss += loss.item()
        if i % 1000 == 999:
            print(f"Epoch: {epoch}, Loss: {running_loss:.3f}")
            running_loss = 0.0
            
torch.save(pi.state_dict(), r"/home/jupyter/BokeGo/policy_weights_1.pt")


1000it [04:06,  4.11it/s]

Epoch: 0, Loss: 4348.894


2000it [08:09,  4.10it/s]

Epoch: 0, Loss: 4312.082


3000it [12:07,  4.25it/s]

Epoch: 0, Loss: 4311.326


4000it [16:03,  4.25it/s]

Epoch: 0, Loss: 4312.000


5000it [20:01,  4.16it/s]

Epoch: 0, Loss: 4311.379


6000it [23:57,  4.25it/s]

Epoch: 0, Loss: 4310.800


7000it [27:54,  4.31it/s]

Epoch: 0, Loss: 4311.009


8000it [31:49,  4.43it/s]

Epoch: 0, Loss: 4312.399


9000it [35:44,  4.33it/s]

Epoch: 0, Loss: 4309.711


10000it [39:40,  4.23it/s]

Epoch: 0, Loss: 4310.807


11000it [43:38,  4.21it/s]

Epoch: 0, Loss: 4312.196


12000it [47:36,  4.25it/s]

Epoch: 0, Loss: 4311.156


13000it [51:32,  4.34it/s]

Epoch: 0, Loss: 4312.435


14000it [55:29,  4.32it/s]

Epoch: 0, Loss: 4309.105


15000it [59:24,  4.24it/s]

Epoch: 0, Loss: 4313.245


16000it [1:03:16,  4.36it/s]

Epoch: 0, Loss: 4309.904


17000it [1:07:09,  4.26it/s]

Epoch: 0, Loss: 4311.098


18000it [1:11:03,  4.22it/s]

Epoch: 0, Loss: 4310.230


19000it [1:15:00,  4.39it/s]

Epoch: 0, Loss: 4313.133


20000it [1:18:57,  4.16it/s]

Epoch: 0, Loss: 4310.384


21000it [1:22:55,  4.19it/s]

Epoch: 0, Loss: 4311.074


22000it [1:26:54,  4.22it/s]

Epoch: 0, Loss: 4311.196


23000it [1:30:50,  4.23it/s]

Epoch: 0, Loss: 4309.800


23185it [1:31:34,  4.22it/s]


In [3]:
conv = nn.Conv2d(10,15,3, padding =1, bias = False)
t = torch.randn(1,6, 9,9)
pi(t)

tensor([[ 0.0540,  0.0022,  0.0101,  0.0671,  0.0038, -0.1001, -0.0221, -0.0253,
          0.0139,  0.0251, -0.0352,  0.0761, -0.0360, -0.0181,  0.1021, -0.0390,
         -0.0032, -0.0578, -0.0845,  0.0258, -0.0808,  0.0038, -0.0513, -0.0980,
         -0.0222, -0.0314, -0.0395,  0.0305, -0.0391, -0.0173,  0.0299, -0.0178,
         -0.0159,  0.0593,  0.0143,  0.0449,  0.0242,  0.0066, -0.0280,  0.0090,
          0.0499,  0.0532,  0.0146,  0.0679, -0.0637,  0.0199, -0.0252, -0.0090,
         -0.0320, -0.1115, -0.0441,  0.0872,  0.0932, -0.0551,  0.0151,  0.0409,
          0.0096,  0.0224,  0.0851, -0.0289,  0.0346,  0.0006,  0.1079,  0.0925,
          0.0058, -0.0157,  0.0438,  0.0029, -0.0196,  0.0646, -0.0003, -0.0274,
         -0.0293, -0.0475,  0.0978, -0.0497, -0.0495,  0.0822, -0.0767, -0.0456,
         -0.0470]], grad_fn=<AddmmBackward>)