In [1]:
import numpy as np 
import torch 
import torch.nn as nn 
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [2]:
class DBN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(773,100)
        self.fc2 = nn.Linear(100,100)
        self.fc3 = nn.Linear(100,100)
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        return x

In [3]:
dbn = DBN()

In [4]:
%%capture

from extractor import get_dataset
path = 'dataset/games.pgn'
num_games = 50

X, Y = get_dataset(path, num_games)
X = X.type(torch.FloatTensor)
Y = Y.type(torch.FloatTensor)
len_data = X.shape[0]
split = int(len_data*0.8)
X_train = X[:split,:]
X_test = X[split:,:]
Y_train = Y[:split]
Y_test = Y[split:]

In [5]:
dbn_dataset = TensorDataset(X, torch.zeros((X.shape[0])))
dbn_dataloader = DataLoader(dbn_dataset, batch_size=64, shuffle=True)
train_dataset = TensorDataset(X_train, Y_train)
train_datalaoder = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataset = TensorDataset(X_test, Y_test)
test_datalaoder = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [6]:
#greedy training 

layers = [773,100,100,100]
W, B, C = [], [], []

for i in range(len(layers)-1):
    w = torch.randn((layers[i],layers[i+1]),dtype=torch.float)
    c = torch.randn((layers[i]),dtype=torch.float) if i==0 else B[i-1]
    b = torch.randn((layers[i+1]),dtype=torch.float)
    
    parameters = [w, c, b]
    for p in parameters:
        p.requires_grad = True

    optimizer = optim.SGD(parameters,lr=0.1)
    criterion = nn.CrossEntropyLoss()
    epochs = 8
    losses = []

    for epoch in range(epochs):
        running_loss = 0.0
        print(f"Epoch {epoch+1}\n-------------------------------")
        for batch, (x, _) in enumerate(dbn_dataloader):
            if i > 0:
                for j in range(len(W)):
                    x = torch.relu(x @ W[j] + B[j])

            probs = torch.relu((torch.relu(x @ w + b) @ w.T) + c)
            loss = criterion(probs,x)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if batch % 6 == 5:
                print(f'[{epoch + 1}, {batch + 1:5d}] loss: {running_loss / 200:.3f}')
                running_loss = 0.0
        losses.append(running_loss)
        
    print(f'finished training layer {i+1}')
    W.append(w.clone().detach())
    B.append(b.clone().detach())
    C.append(c.clone().detach())
    
    for p in parameters: del p



            

Epoch 1
-------------------------------
[1,     6] loss: 29.954
[1,    12] loss: 8.278
[1,    18] loss: 6.172
[1,    24] loss: 5.457
[1,    30] loss: 5.015
[1,    36] loss: 4.870
[1,    42] loss: 4.568
[1,    48] loss: 4.516
[1,    54] loss: 4.437
[1,    60] loss: 4.342
[1,    66] loss: 4.118
Epoch 2
-------------------------------
[2,     6] loss: 4.169
[2,    12] loss: 4.130
[2,    18] loss: 4.059
[2,    24] loss: 4.137
[2,    30] loss: 4.051
[2,    36] loss: 4.049
[2,    42] loss: 4.117
[2,    48] loss: 3.870
[2,    54] loss: 4.021
[2,    60] loss: 3.818
[2,    66] loss: 3.980
Epoch 3
-------------------------------
[3,     6] loss: 4.055
[3,    12] loss: 3.802
[3,    18] loss: 3.978
[3,    24] loss: 4.002
[3,    30] loss: 3.866
[3,    36] loss: 3.829
[3,    42] loss: 3.869
[3,    48] loss: 3.840
[3,    54] loss: 3.765
[3,    60] loss: 3.811
[3,    66] loss: 3.775
Epoch 4
-------------------------------
[4,     6] loss: 3.853
[4,    12] loss: 3.958
[4,    18] loss: 3.690
[4,    24] 