### Model Architecture

In [353]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import DataLoader, random_split
from conversion import CSVToTensor

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.input_size = 9
        self.output_size = 9
        self.hidden_size = 1*27
        self.middle_size = 2*27

        self.x0 = None
        self.x1 = None
        self.x2 = None
        self.x3 = None

        self.W1 = nn.Parameter(torch.randn(self.input_size, self.hidden_size))
        self.b1 = nn.Parameter(torch.randn(self.hidden_size))
        # self.W2 = nn.Parameter(torch.randn(self.hidden_size, self.hidden_size))
        # self.b2 = nn.Parameter(torch.randn(self.hidden_size))
        self.W3 = nn.Parameter(torch.randn(self.hidden_size, self.middle_size))
        self.b3 = nn.Parameter(torch.randn(self.middle_size))
        self.W4 = nn.Parameter(torch.randn(self.middle_size, self.hidden_size))
        self.b4 = nn.Parameter(torch.randn(self.hidden_size))
        self.W5 = nn.Parameter(torch.randn(self.hidden_size, self.output_size))
        self.b5 = nn.Parameter(torch.randn(self.output_size))
        self.relu = nn.ReLU()
        # self.dropout = nn.Dropout(0.3)
        self.crossloss = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.train_data = None
        self.val_data = None
    
    def forward(self, x):
        self.x0 = x
        self.x1 = x @ self.W1 + self.b1
        self.x2 = self.relu(self.x1)
        # self.x3 = self.x2 @ self.W2 + self.b2
        # self.x4 = self.relu(self.x3)
        self.x5 = self.x2 @ self.W3 + self.b3
        self.x6 = self.relu(self.x5)
        self.x7 = self.x6 @ self.W4 + self.b4
        self.x8 = self.relu(self.x7)
        self.x9 = self.x8 @ self.W5 + self.b5
        # x = self.dropout(x)
        
        return self.x9

    def load_data(self, file_path):
        dataloader = CSVToTensor(file_path)
        dataloader.create_all_tensor()
        dataset = dataloader.create_a_dataset()

        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        self.train_data, self.val_data = random_split(dataset, [train_size, val_size])
        self.train_data = DataLoader(self.train_data, batch_size=16, shuffle=True)
        self.val_data = DataLoader(self.val_data, batch_size=16, shuffle=True)

    def train_model(self, epochs):
        self.to(self.device)
        self.train()
        for epoch in range(epochs):
            epoch_loss = 0
            for src, trg in self.train_data:
                src = src.to(self.device)
                trg = trg.to(self.device)
                self.optimizer.zero_grad()
                output = self.forward(src)
                loss = self.crossloss(output, trg.argmax(dim=1))
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.item()
            avg_loss = epoch_loss / len(self.train_data)
            print(f"Epoch: {epoch}, Loss: {avg_loss:.4f}")
    

In [2]:
# if __name__ == '__main__':
#     model = Model()
#     # choose the dataset file path
#     model.load_data('./Datasets/tic_tac_toe_500_games.csv')
#     # choose the number of epochs
#     with torch.no_grad():
#         model.W1.copy_(torch.zeros(model.input_size, model.hidden_size))
#         model.b1.copy_(torch.ones(model.hidden_size))
#         model.W2.copy_(torch.zeros(model.hidden_size, model.output_size))
#         model.b2.copy_(torch.ones(model.output_size))
#     model.train_model(11)

In [354]:
M = Model()

### Neural Network Weight Calculation by Human Reflection

The following three cells demonstrate how a neural network's weights can be manually calculated to understand how updates affect the network. This process is aimed at providing educational insight into debugging neural networks.

We consider 6 possible input scenarios and manually calculate the appropriate weights to ensure correctness for each scenario without causing errors in others.

(Note: This is purely for educational purposes to understand debugging and does not alter the models themselves.)

---

In [4]:
mytensor1 = torch.tensor([[1,0,1],[0,0,0],[0,0,0]], dtype=torch.float32)
mytensor1 = mytensor1.reshape(1,9)
outtensor1 = torch.tensor([[0,2,0,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor1.view(3,3))
print("-----------------")

mytensor2 = torch.tensor([[1,1,0],[0,0,0],[0,0,0]], dtype=torch.float32)
mytensor2 = mytensor2.reshape(1,9)
outtensor2 = torch.tensor([[0,0,2,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor2.view(3,3))
print("-----------------")

mytensor3 = torch.tensor([[0,1,1],[0,0,0],[0,0,0]], dtype=torch.float32)
mytensor3 = mytensor3.reshape(1,9)
outtensor3 = torch.tensor([[2,0,0,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor3.view(3,3))
print("-----------------")

mytensor4 = torch.tensor([[1,0,0],[0,0,0],[1,0,0]], dtype=torch.float32)
mytensor4 = mytensor4.reshape(1,9)
outtensor4 = torch.tensor([[0,0,0,2,0,0,0,0,0]], dtype=torch.float32)

print(mytensor4.view(3,3))
print("-----------------")

mytensor5 = torch.tensor([[1,0,0],[1,0,0],[0,0,0]], dtype=torch.float32)
mytensor5 = mytensor5.reshape(1,9)
outtensor5 = torch.tensor([[0,0,0,0,0,0,2,0,0]], dtype=torch.float32)

print(mytensor5.view(3,3))
print("-----------------")

mytensor6 = torch.tensor([[0,0,0],[1,0,0],[1,0,0]], dtype=torch.float32)
mytensor6 = mytensor6.reshape(1,9)
outtensor6 = torch.tensor([[2,0,0,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor6.view(3,3))
print("-----------------")

tensor([[1., 0., 1.],
        [0., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[1., 1., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[0., 1., 1.],
        [0., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[1., 0., 0.],
        [0., 0., 0.],
        [1., 0., 0.]])
-----------------
tensor([[1., 0., 0.],
        [1., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[0., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])
-----------------


In [5]:
with torch.no_grad():
    M.W1.copy_(torch.zeros(M.input_size, M.hidden_size))
    M.b1.copy_(torch.zeros(M.hidden_size))
    M.W2.copy_(torch.zeros(M.hidden_size, M.output_size))
    M.b2.copy_(torch.zeros(M.output_size))

    M.W1[0, 1] = 2
    M.W1[0, 0] = -2
    M.W1[0, 3] = 2

    M.W1[1,1] = -2
    M.W1[1, 2] = 2
    M.W1[1, 3] = -2
        
    M.W1[2, 0] = 2
    M.W1[2, 2] = -2
    M.W1[2 , 3] = - 2
    
    M.W1[3, 6] = 2
    M.W1[3 ,3] = -2
    M.W1[3, 1] = -2
    
    M.W1[6, 1] = -2
    M.W1[6, 6] = -2
    M.W1[6, 0] = 2
    M.W2[0:M.output_size , 0:M.output_size] = torch.eye(M.output_size)
print(M.W1)
# print(M.b1)
# print(M.W2)

RuntimeError: The size of tensor a (27) must match the size of tensor b (9) at non-singleton dimension 1

In [None]:
output = M.forward(mytensor6)
x0 = M.x0
x1 = M.x1
x2 = M.x2
x3 = M.x3

print(x0)
print("----------------")
print(output)
print("----------------")
print("Excepted ")
print(outtensor2)
print("----------------")
print("Output argmax :")
output.argmax()
# loss = mytensor2 - outtensor2
# loss.sum()

tensor([[0., 0., 0., 1., 0., 0., 1., 0., 0.]])
----------------
tensor([[2., 0., 0., 0., 0., 0., 0., 0., 0.]], grad_fn=<AddBackward0>)
----------------
Excepted 
tensor([[0., 0., 2., 0., 0., 0., 0., 0., 0.]])
----------------
Output argmax :


tensor(0)

In [None]:
f1 = lambda in0, out0: out0-in0
# example of how work lambda function

### Neural Network Weight Calculation by Machine Reflection

The following three cells demonstrate how a neural network's weights can be calculated using machine reflection to understand how updates affect the network. This process is aimed at providing educational insight into debugging neural networks.

For each line of code, we'll reflect on the machine's operations to ensure correctness. We consider 6 possible input scenarios and calculate the appropriate weights to ensure they are correct for each scenario without causing errors in others.

The values are manually updated based on the results of the backpropagation provided by the model.

The dataset for training will be composed only of the 6 tensor that you seen above and will be in the ./Datasets/debug.csv

(Note: This is purely for educational purposes to understand debugging through machine reflection and does not alter the models themselves.)

---

In [397]:
#prepare the data for training,
# we don't split the data because we will evaluate manually
from torch.utils.data import Dataset

path = './Datasets/debug.csv'
loader = CSVToTensor(path)
loader.create_all_tensor()

class CustomDataset(Dataset):
    def __init__(self, input_tensor, output_tensor):
        self.input_tensor = input_tensor
        self.output_tensor = output_tensor

    def __len__(self):
        return len(self.input_tensor)

    def __getitem__(self, idx):
        input_data = self.input_tensor[idx]
        output_data = self.output_tensor[idx]
        return input_data, output_data

input_tensor = loader.game_tensor
output_tensor = loader.prediction_tensor

combined_dataset = CustomDataset(input_tensor, output_tensor)

combined_dataloader = DataLoader(combined_dataset, batch_size=6, shuffle=True)

In [398]:
for src, trg in combined_dataloader:
    print("src : " , src, "\ntrg : " , trg)
    print("-----------------")

src :  tensor([[1., 0., 0., 0., 0., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 1., 1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 1., 0., 0.]]) 
trg :  tensor([[0., 0., 0., 2., 0., 0., 0., 0., 0.],
        [0., 2., 0., 0., 0., 0., 0., 0., 0.],
        [2., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 2., 0., 0.],
        [0., 0., 2., 0., 0., 0., 0., 0., 0.],
        [2., 0., 0., 0., 0., 0., 0., 0., 0.]])
-----------------


In [440]:
with torch.enable_grad():
    M.W1 = nn.Parameter(torch.randn(M.input_size, M.hidden_size))
    M.b1 = nn.Parameter(torch.randn(M.hidden_size))
    # M.W2 = nn.Parameter(torch.randn(M.hidden_size, M.hidden_size))
    # M.b2 = nn.Parameter(torch.randn(M.hidden_size))
    M.W3 = nn.Parameter(torch.randn(M.hidden_size, M.middle_size))
    M.b3 = nn.Parameter(torch.randn(M.middle_size))
    M.W4 = nn.Parameter(torch.randn(M.middle_size, M.hidden_size))
    M.b4 = nn.Parameter(torch.randn(M.hidden_size))
    M.W5 = nn.Parameter(torch.randn(M.hidden_size, M.output_size))
    M.b5 = nn.Parameter(torch.randn(M.output_size))

#### Train the model and debug him

In [443]:
for _ in range (240):
    for src, trg in combined_dataloader:
        M.optimizer.zero_grad()

        output = M.forward(src)

        # loss = torch.pow(output - trg, 2).sum().sqrt()
        # loss = torch.nn.functional.mse_loss(output, trg)
        loss = M.crossloss(output, trg.argmax(dim=1))

        loss.backward()

        M.optimizer.step()

In [444]:
print ("\nloss :\t", loss.item())
print("learning rate : ", M.optimizer.param_groups[0]['lr'])
# print ("source :\t", src)
print ("input :\t", M.x0)
print ("target :", trg)
print ("output :", output)
print ("\nW1 grad :\t", M.W1.grad)
print ('W1 grad max :\t', M.W1.grad.max().item())


loss :	 259.7158508300781
learning rate :  0.001
input :	 tensor([[1., 1., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 1., 0., 0.],
        [1., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 1., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 1., 0., 0.],
        [0., 1., 1., 0., 0., 0., 0., 0., 0.]])
target : tensor([[0., 0., 2., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 2., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 2., 0., 0.],
        [0., 2., 0., 0., 0., 0., 0., 0., 0.],
        [2., 0., 0., 0., 0., 0., 0., 0., 0.],
        [2., 0., 0., 0., 0., 0., 0., 0., 0.]])
output : tensor([[ 403.5565,  -23.2329, -123.1361, -132.7495,  -60.0375, -245.1788,
         -241.0100,   48.6205, -165.5857],
        [ 320.4021,  -40.1247, -140.8801,   -2.1158,  -81.3735, -194.7916,
         -154.6671,   69.7379, -154.3593],
        [  92.2327,   30.5439, -145.9053,   28.3069,  -19.4615, -214.6596,
         -219.6817,    6.5259, -119.1038],
        [  81.9

In [276]:
#Make a backup of the model parameters
W1_backup = M.W1
W2_backup = M.W2
b1_backup = M.b1
b2_backup = M.b2
loss_backup = loss

In [None]:
print("loss backup : ", loss_backup)
print("----------------")
print(W1_backup)
print(b1_backup)
print("----------------")
print(W2_backup)
print(b2_backup)

loss backup :  tensor(9.0931, grad_fn=<NllLossBackward0>)
----------------
Parameter containing:
tensor([[-8.3823e-01,  1.6689e+00, -4.8720e-01,  6.5497e-01,  2.5154e+00,
          4.9060e-01, -8.4316e-01,  4.2954e-01, -2.7034e-01,  5.9296e-01,
          1.2493e-01,  8.7014e-01,  8.9828e-01, -5.9537e-01, -7.6154e-01,
         -5.7165e-01, -1.8796e+00,  2.3637e-01, -3.7898e-01,  2.3082e-01,
         -4.9364e-01,  4.1974e-01, -5.6861e-01,  2.2960e+00,  1.2559e-01,
         -3.1320e-01,  1.9170e-01,  8.8043e-01, -1.2137e+00,  2.8339e-01,
          1.2445e+00,  7.9507e-01,  1.9056e-01, -1.2515e-01, -3.3326e-02,
         -2.8241e-01, -1.3753e+00, -1.4740e+00, -5.4155e-01,  8.6255e-01,
          6.7343e-01,  9.6610e-01, -1.9353e-01,  5.9533e-01, -2.4593e-02,
          2.4699e-01,  1.0093e+00,  6.3805e-01, -3.5799e-01, -4.7814e-01,
          3.4427e-01,  1.3501e+00, -2.1410e+00,  1.6910e+00],
        [-6.6460e-01, -8.4688e-01,  4.0256e-01,  1.5847e+00,  3.1539e-01,
          2.3975e+00,  1.14

In [None]:
# Restore the model parameters
M.W1 = W1_backup
M.W2 = W2_backup
M.b1 = b1_backup
M.b2 = b2_backup

#### Choose an input "in the dataset" for debug visualization

In [30]:
i = 5

src_i = combined_dataloader.dataset.input_tensor[i]
trg_i = combined_dataloader.dataset.output_tensor[i]

print ("\ninput  :", src_i)
print ("target :", trg_i)

debug_output = M.forward(src_i)
print ("debug output :", debug_output)

# debug_loss = torch.pow(debug_output - trg_i, 2).sum().sqrt()
debug_loss = M.crossloss(debug_output, trg_i.argmax(dim=0))
print ("\ndebug loss :\t", debug_loss)

debug_loss.backward()
print ("W1 grad :\t", M.W1.grad)




input  : tensor([0., 0., 0., 1., 0., 0., 1., 0., 0.])
target : tensor([2., 0., 0., 0., 0., 0., 0., 0., 0.])
debug output : tensor([  6.1847,   3.5869,   5.3315,  -2.3206, -10.5088,  -5.2965,  -1.8887,
          6.5189,  -2.1786,   9.5136,   3.6561,  -9.4451,   2.7971,  -4.7631,
          2.4654,   2.2339,   7.2720,  -4.8319,  -5.9096,   2.8287,  -0.0551,
          1.6739,   5.6637,   1.4134,  -0.2863, -12.3256,  -3.6353],
       grad_fn=<AddBackward0>)

debug loss :	 tensor(3.5433, grad_fn=<NllLossBackward0>)
W1 grad :	 tensor([[ 0.0000e+00,  1.0799e+00,  4.3889e+00, -1.1989e+00, -1.0318e+00,
          0.0000e+00, -2.2258e+00,  2.8811e-01, -3.0002e+00,  1.0454e+00,
          2.6131e+00,  8.2442e+00, -5.2789e-01, -6.9018e-01,  1.2503e+00,
          1.3180e+00,  5.5312e+00, -1.7584e+00, -1.6998e+00, -5.8338e-01,
          1.0824e+01,  5.6999e+00, -1.7084e+00,  3.7661e+00, -5.7395e-01,
         -6.0848e-01,  1.9141e+00],
        [ 0.0000e+00,  1.0626e+00, -8.0438e-01, -8.4266e-01, -1.719

##### Example of how each function of learning can be simplified to undersand his meanning

In [21]:
with torch.no_grad():

    M.W1.grad = M.W1.grad * 0
    newoutput = M.forward(src)
    print(newoutput)
    newloss = torch.abs(newoutput - loader.prediction_tensor[i]).sum()
    print(newloss)
    newloss = torch.pow(newoutput - loader.prediction_tensor[i], 2).sum().sqrt()
    print(newloss)
    newloss.backward()
    M.W1 -= (M.W1.grad) * 0.0001
    

tensor([[ 10.0378,   0.2473,   7.1022, -13.1150,  -8.6392,  -3.1972,   0.4513,
           7.0299, -10.9851,   2.0947,  -4.4618, -12.2502,  -4.1681,   0.6682,
           3.6768,  -4.6908,  -3.0664,  -5.0941,  -7.5001,  -2.5662,  -3.8774,
          -2.3144,   8.4225,  -1.8473,   1.7161, -12.6346,  -1.0849]])


RuntimeError: The size of tensor a (27) must match the size of tensor b (9) at non-singleton dimension 1