In [1]:
import numpy as np 
import torch 
import torch.nn as nn 
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [2]:
class DBN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(773,100)
        self.fc2 = nn.Linear(100,100)
        self.fc3 = nn.Linear(100,100)
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        return x

In [3]:
dbn = DBN()

In [4]:
%%capture

from extractor import get_dataset
path = 'dataset/games.pgn'
num_games = 50

X, Y = get_dataset(path, num_games)
X = X.type(torch.FloatTensor)
Y = Y.type(torch.FloatTensor)
len_data = X.shape[0]
split = int(len_data*0.8)
X_train = X[:split,:]
X_test = X[split:,:]
Y_train = Y[:split]
Y_test = Y[split:]

In [5]:
dbn_dataset = TensorDataset(X, torch.zeros((X.shape[0])))
dbn_dataloader = DataLoader(dbn_dataset, batch_size=64, shuffle=True)
train_dataset = TensorDataset(X_train, Y_train)
train_datalaoder = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataset = TensorDataset(X_test, Y_test)
test_datalaoder = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [6]:
#greedy training 

layers = [773,100,100,100]
W, B, C = [], [], []

for i in range(len(layers)-1):
    w = torch.randn((layers[i],layers[i+1]),dtype=torch.float)
    c = torch.randn((layers[i]),dtype=torch.float)
    b = torch.randn((layers[i+1]),dtype=torch.float)
    
    parameters = [w, c, b]
    for p in parameters:
        p.requires_grad = True

    optimizer = optim.SGD(parameters,lr=0.1)
    criterion = nn.CrossEntropyLoss()
    epochs = 8
    losses = []

    for epoch in range(epochs):
        running_loss = 0.0
        print(f"Epoch {epoch+1}\n-------------------------------")
        for batch, (x, _) in enumerate(dbn_dataloader):
            if i > 0:
                for j in range(len(W)):
                    x = torch.relu(x @ W[j] + B[j])

            probs = torch.relu((torch.relu(x @ w + b) @ w.T) + c)
            loss = criterion(probs,x)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if batch % 6 == 5:
                print(f'[{epoch + 1}, {batch + 1:5d}] loss: {running_loss / 200:.3f}')
                running_loss = 0.0
        losses.append(running_loss)
        
    print(f'finished training layer {i+1}')
    W.append(w.clone().detach())
    B.append(b.clone().detach())
    C.append(c.clone().detach())
    
    for p in parameters: del p



            

Epoch 1
-------------------------------
[1,     6] loss: 26.407
[1,    12] loss: 8.206
[1,    18] loss: 6.154
[1,    24] loss: 5.337
[1,    30] loss: 5.045
[1,    36] loss: 4.740
[1,    42] loss: 4.526
[1,    48] loss: 4.358
[1,    54] loss: 4.418
[1,    60] loss: 4.317
[1,    66] loss: 4.345
Epoch 2
-------------------------------
[2,     6] loss: 4.214
[2,    12] loss: 4.109
[2,    18] loss: 3.979
[2,    24] loss: 4.141
[2,    30] loss: 4.196
[2,    36] loss: 4.034
[2,    42] loss: 4.018
[2,    48] loss: 4.087
[2,    54] loss: 3.964
[2,    60] loss: 4.030
[2,    66] loss: 4.055
Epoch 3
-------------------------------
[3,     6] loss: 4.081
[3,    12] loss: 4.074
[3,    18] loss: 4.021
[3,    24] loss: 3.924
[3,    30] loss: 3.899
[3,    36] loss: 3.780
[3,    42] loss: 3.927
[3,    48] loss: 3.939
[3,    54] loss: 3.937
[3,    60] loss: 3.872
[3,    66] loss: 3.982
Epoch 4
-------------------------------
[4,     6] loss: 4.047
[4,    12] loss: 3.788
[4,    18] loss: 3.927
[4,    24] 

In [7]:
x = X[:64,:]
w = torch.randn((773,100),dtype=torch.float)
c = torch.randn((773),dtype=torch.float)
b = torch.randn((100),dtype=torch.float)
torch.sigmoid((torch.sigmoid(x @ w + b) @ w.T) + c).shape, x.shape

(torch.Size([64, 773]), torch.Size([64, 773]))