In [1]:
import numpy as np 
import torch 
import torch.nn as nn 
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [2]:
class DBN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(773,100)
        self.fc2 = nn.Linear(100,100)
        self.fc3 = nn.Linear(100,100)
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        return x

In [3]:
dbn = DBN()

In [4]:
%%capture

from extractor import get_dataset
path = 'dataset/games.pgn'
num_games = 5000

X, Y = get_dataset(path, num_games)
X = X.type(torch.FloatTensor)
Y = Y.type(torch.FloatTensor)
len_data = X.shape[0]
split = int(len_data*0.8)
X_train = X[:split,:]
X_test = X[split:,:]
Y_train = Y[:split]
Y_test = Y[split:]

In [5]:
batch_size = 64

dbn_dataset = TensorDataset(X, torch.zeros((X.shape[0])))
dbn_dataloader = DataLoader(dbn_dataset, batch_size=batch_size, shuffle=True)
train_dataset = TensorDataset(X_train, Y_train)
train_datalaoder = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TensorDataset(X_test, Y_test)
test_datalaoder = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [8]:
#greedy training 

layers = [773,100,100,100]
W, B, C = [], [], []

for i in range(len(layers)-1):
    w = torch.randn((layers[i],layers[i+1]),dtype=torch.float)
    c = torch.randn((layers[i]),dtype=torch.float) if i==0 else B[i-1]
    b = torch.randn((layers[i+1]),dtype=torch.float)
    
    parameters = [w, c, b]
    for p in parameters:
        p.requires_grad = True
    lr = 0.1
    optimizer = optim.SGD(parameters,lr=lr)
    criterion = nn.CrossEntropyLoss()
    epochs = 8 if i == 0 else 4 
    losses = []

    for epoch in range(epochs):
        running_loss = 0.0
        print(f"Epoch {epoch+1}\n-------------------------------")
        for batch, (x, _) in enumerate(dbn_dataloader):
            if i > 0:
                for j in range(len(W)):
                    x = torch.relu(x @ W[j] + B[j])

            probs = torch.relu((torch.relu(x @ w + b) @ w.T) + c)
            loss = criterion(probs,x)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if batch % (num_games//10) == (num_games//10) - 1:
                print(f'[{epoch + 1}, {batch + 1:5d}] loss: {running_loss / (num_games//10):.3f}')
                running_loss = 0.0
        losses.append(running_loss)
        
    print(f'finished training layer {i+1}')
    W.append(w.clone().detach())
    B.append(b.clone().detach())
    C.append(c.clone().detach())
    
    for p in parameters: del p



            

Epoch 1
-------------------------------
[1,   500] loss: 147.163
[1,  1000] loss: 128.240
[1,  1500] loss: 126.818
[1,  2000] loss: 125.984
[1,  2500] loss: 125.103
[1,  3000] loss: 125.597
[1,  3500] loss: 124.717
[1,  4000] loss: 123.507
[1,  4500] loss: 122.823
[1,  5000] loss: 122.342
[1,  5500] loss: 121.864
Epoch 2
-------------------------------
[2,   500] loss: 120.832
[2,  1000] loss: 121.319
[2,  1500] loss: 119.341
[2,  2000] loss: 119.132
[2,  2500] loss: 117.348
[2,  3000] loss: 114.828
[2,  3500] loss: 112.589
[2,  4000] loss: 111.336
[2,  4500] loss: 110.533
[2,  5000] loss: 109.831
[2,  5500] loss: 109.059
Epoch 3
-------------------------------
[3,   500] loss: 106.252
[3,  1000] loss: 105.160
[3,  1500] loss: 104.471
[3,  2000] loss: 103.167
[3,  2500] loss: 102.346
[3,  3000] loss: 101.144
[3,  3500] loss: 100.525
[3,  4000] loss: 100.206
[3,  4500] loss: 98.488
[3,  5000] loss: 98.456
[3,  5500] loss: 97.619
Epoch 4
-------------------------------
[4,   500] loss: 9

In [7]:
torch.max(W[2])

tensor(105.9501)