In [2]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import DataLoader, random_split
from conversion import CSVToTensor

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.input_size = 9
        self.output_size = 9
        self.hidden_size = 2*27

        self.x0 = None
        self.x1 = None
        self.x2 = None
        self.x3 = None

        self.W1 = nn.Parameter(torch.randn(self.input_size, self.hidden_size))
        self.b1 = nn.Parameter(torch.randn(self.hidden_size))

        self.W2 = nn.Parameter(torch.randn(self.hidden_size, self.output_size))
        self.b2 = nn.Parameter(torch.randn(self.output_size))
        self.relu = nn.ReLU()
        # self.dropout = nn.Dropout(0.3)
        self.crossloss = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.train_data = None
        self.val_data = None
    
    def forward(self, x):
        self.x0 = x
        self.x1 = x @ self.W1 + self.b1
        self.x2 = self.relu(self.x1)
        self.x3 = self.x2 @ self.W2 + self.b2
        # x = self.dropout(x)
        
        return self.x3

    def load_data(self, file_path):
        dataloader = CSVToTensor(file_path)
        dataloader.create_all_tensor()
        dataset = dataloader.create_a_dataset()

        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        self.train_data, self.val_data = random_split(dataset, [train_size, val_size])
        self.train_data = DataLoader(self.train_data, batch_size=16, shuffle=True)
        self.val_data = DataLoader(self.val_data, batch_size=16, shuffle=True)

    def train_model(self, epochs):
        self.to(self.device)
        self.train()
        for epoch in range(epochs):
            epoch_loss = 0
            for src, trg in self.train_data:
                src = src.to(self.device)
                trg = trg.to(self.device)
                self.optimizer.zero_grad()
                output = self.forward(src)
                loss = self.crossloss(output, trg.argmax(dim=1))
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.item()
            avg_loss = epoch_loss / len(self.train_data)
            print(f"Epoch: {epoch}, Loss: {avg_loss:.4f}")
    

In [3]:
# if __name__ == '__main__':
#     model = Model()
#     # choose the dataset file path
#     model.load_data('./Datasets/tic_tac_toe_500_games.csv')
#     # choose the number of epochs
#     with torch.no_grad():
#         model.W1.copy_(torch.zeros(model.input_size, model.hidden_size))
#         model.b1.copy_(torch.ones(model.hidden_size))
#         model.W2.copy_(torch.zeros(model.hidden_size, model.output_size))
#         model.b2.copy_(torch.ones(model.output_size))
#     model.train_model(11)

In [4]:
M = Model()

### Neural Network Weight Calculation by Human Reflection

The following three cells demonstrate how a neural network's weights can be manually calculated to understand how updates affect the network. This process is aimed at providing educational insight into debugging neural networks.

We consider 6 possible input scenarios and manually calculate the appropriate weights to ensure correctness for each scenario without causing errors in others.

(Note: This is purely for educational purposes to understand debugging and does not alter the models themselves.)

---

In [5]:
mytensor1 = torch.tensor([[1,0,1],[0,0,0],[0,0,0]], dtype=torch.float32)
mytensor1 = mytensor1.reshape(1,9)
outtensor1 = torch.tensor([[0,2,0,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor1.view(3,3))
print("-----------------")

mytensor2 = torch.tensor([[1,1,0],[0,0,0],[0,0,0]], dtype=torch.float32)
mytensor2 = mytensor2.reshape(1,9)
outtensor2 = torch.tensor([[0,0,2,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor2.view(3,3))
print("-----------------")

mytensor3 = torch.tensor([[0,1,1],[0,0,0],[0,0,0]], dtype=torch.float32)
mytensor3 = mytensor3.reshape(1,9)
outtensor3 = torch.tensor([[2,0,0,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor3.view(3,3))
print("-----------------")

mytensor4 = torch.tensor([[1,0,0],[0,0,0],[1,0,0]], dtype=torch.float32)
mytensor4 = mytensor4.reshape(1,9)
outtensor4 = torch.tensor([[0,0,0,2,0,0,0,0,0]], dtype=torch.float32)

print(mytensor4.view(3,3))
print("-----------------")

mytensor5 = torch.tensor([[1,0,0],[1,0,0],[0,0,0]], dtype=torch.float32)
mytensor5 = mytensor5.reshape(1,9)
outtensor5 = torch.tensor([[0,0,0,0,0,0,2,0,0]], dtype=torch.float32)

print(mytensor5.view(3,3))
print("-----------------")

mytensor6 = torch.tensor([[0,0,0],[1,0,0],[1,0,0]], dtype=torch.float32)
mytensor6 = mytensor6.reshape(1,9)
outtensor6 = torch.tensor([[2,0,0,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor6.view(3,3))
print("-----------------")

tensor([[1., 0., 1.],
        [0., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[1., 1., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[0., 1., 1.],
        [0., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[1., 0., 0.],
        [0., 0., 0.],
        [1., 0., 0.]])
-----------------
tensor([[1., 0., 0.],
        [1., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[0., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])
-----------------


In [6]:
with torch.no_grad():
    M.W1.copy_(torch.zeros(M.input_size, M.hidden_size))
    M.b1.copy_(torch.zeros(M.hidden_size))
    M.W2.copy_(torch.zeros(M.hidden_size, M.output_size))
    M.b2.copy_(torch.zeros(M.output_size))

    M.W1[0, 1] = 2
    M.W1[0, 0] = -2
    M.W1[0, 3] = 2

    M.W1[1,1] = -2
    M.W1[1, 2] = 2
    M.W1[1, 3] = -2
        
    M.W1[2, 0] = 2
    M.W1[2, 2] = -2
    M.W1[2 , 3] = - 2
    
    M.W1[3, 6] = 2
    M.W1[3 ,3] = -2
    M.W1[3, 1] = -2
    
    M.W1[6, 1] = -2
    M.W1[6, 6] = -2
    M.W1[6, 0] = 2
    M.W2[0:M.output_size , 0:M.output_size] = torch.eye(M.output_size)
print(M.W1)
# print(M.b1)
# print(M.W2)

Parameter containing:
tensor([[-2.,  2.,  0.,  2.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0., -2.,  2., -2.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 2.,  0., -2., -2.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0., -2.,  0., -2.,  0.,  0.,  2.,  0.,  0.,  0.

In [7]:
output = M.forward(mytensor6)
x0 = M.x0
x1 = M.x1
x2 = M.x2
x3 = M.x3

print(x0)
print("----------------")
print(output)
print("----------------")
print("Excepted ")
print(outtensor2)
print("----------------")
print("Output argmax :")
output.argmax()
# loss = mytensor2 - outtensor2
# loss.sum()

tensor([[0., 0., 0., 1., 0., 0., 1., 0., 0.]])
----------------
tensor([[2., 0., 0., 0., 0., 0., 0., 0., 0.]], grad_fn=<AddBackward0>)
----------------
Excepted 
tensor([[0., 0., 2., 0., 0., 0., 0., 0., 0.]])
----------------
Output argmax :


tensor(0)

In [8]:
f1 = lambda in0, out0: out0-in0
# example of how work lambda function

### Neural Network Weight Calculation by Machine Reflection

The following three cells demonstrate how a neural network's weights can be calculated using machine reflection to understand how updates affect the network. This process is aimed at providing educational insight into debugging neural networks.

For each line of code, we'll reflect on the machine's operations to ensure correctness. We consider 6 possible input scenarios and calculate the appropriate weights to ensure they are correct for each scenario without causing errors in others.

The values are manually updated based on the results of the backpropagation provided by the model.

The dataset for training will be composed only of the 6 tensor that you seen above and will be in the ./Datasets/debug.csv

(Note: This is purely for educational purposes to understand debugging through machine reflection and does not alter the models themselves.)

---

In [279]:
with torch.enable_grad():
    M.W1 = nn.Parameter(torch.randn(M.input_size, M.hidden_size))
    M.b1 = nn.Parameter(torch.randn(M.hidden_size))
    M.W2 = nn.Parameter(torch.randn(M.hidden_size, M.output_size))
    M.b2 = nn.Parameter(torch.randn(M.output_size))

In [278]:
W1_backup = M.W1
W2_backup = M.W2
b1_backup = M.b1
b2_backup = M.b2

print(W1_backup)
print(b1_backup)
print("----------------")
print(W2_backup)
print(b2_backup)

Parameter containing:
tensor([[ 0.7607,  1.1830, -2.1755,  0.4035,  0.5289,  0.4068, -0.3963, -0.0067,
          0.2504, -0.4641,  0.8439, -0.2991, -1.2224,  0.1305, -0.7202,  0.7318,
         -1.2005, -0.9026, -0.3240, -0.3731,  1.8200,  2.4043, -0.3637, -2.6069,
         -0.4025, -0.6294,  0.0035,  0.8499, -0.3010,  0.7200,  0.0547, -2.3964,
         -0.8217, -0.2373, -1.8279, -1.0222,  0.3012, -1.9624,  0.6025,  0.5132,
         -1.7385,  0.4129, -1.8338,  1.1736,  1.1272, -1.1030, -1.5456, -0.5622,
          0.8727,  0.2063,  1.4553, -0.7751, -0.0583,  0.4135],
        [-0.7431, -0.3773, -1.9917,  0.3992,  0.0781,  1.3508,  0.7905, -1.1979,
          0.0972,  0.8137, -1.5300, -2.3547,  2.3226,  0.3028, -1.1419,  1.4530,
          0.1525, -0.8001, -0.6464, -1.0195, -1.0396, -1.4570,  0.9839,  0.7677,
         -0.2743,  0.6185,  0.8377, -0.0437, -0.2232,  0.0956,  0.9923,  0.7170,
          0.4228,  0.7770, -1.0367, -0.2835, -1.2049, -0.7741, -1.4930, -0.6746,
         -0.1737,  0.09

In [277]:
M.W1 = W1_backup
M.W2 = W2_backup
M.b1 = b1_backup
M.b2 = b2_backup

In [291]:
#prepare the data for training,
# we don't split the data because we will evaluate manually
from torch.utils.data import Dataset

path = './Datasets/debug.csv'
loader = CSVToTensor(path)
loader.create_all_tensor()

class CustomDataset(Dataset):
    def __init__(self, input_tensor, output_tensor):
        self.input_tensor = input_tensor
        self.output_tensor = output_tensor

    def __len__(self):
        return len(self.input_tensor)

    def __getitem__(self, idx):
        input_data = self.input_tensor[idx]
        output_data = self.output_tensor[idx]
        return input_data, output_data

input_tensor = loader.game_tensor
output_tensor = loader.prediction_tensor

combined_dataset = CustomDataset(input_tensor, output_tensor)

combined_dataloader = DataLoader(combined_dataset, batch_size=1, shuffle=True)


In [282]:
for src, trg in combined_dataloader:
    print("src : " , src, "trg : " , trg)
    print("-----------------")

src :  tensor([[1., 1., 0., 0., 0., 0., 0., 0., 0.]]) trg :  tensor([[0., 0., 2., 0., 0., 0., 0., 0., 0.]])
-----------------
src :  tensor([[0., 1., 1., 0., 0., 0., 0., 0., 0.]]) trg :  tensor([[2., 0., 0., 0., 0., 0., 0., 0., 0.]])
-----------------
src :  tensor([[0., 0., 0., 1., 0., 0., 1., 0., 0.]]) trg :  tensor([[2., 0., 0., 0., 0., 0., 0., 0., 0.]])
-----------------
src :  tensor([[1., 0., 1., 0., 0., 0., 0., 0., 0.]]) trg :  tensor([[0., 2., 0., 0., 0., 0., 0., 0., 0.]])
-----------------
src :  tensor([[1., 0., 0., 0., 0., 0., 1., 0., 0.]]) trg :  tensor([[0., 0., 0., 2., 0., 0., 0., 0., 0.]])
-----------------
src :  tensor([[1., 0., 0., 1., 0., 0., 0., 0., 0.]]) trg :  tensor([[0., 0., 0., 0., 0., 0., 2., 0., 0.]])
-----------------


In [303]:
for _ in range (36):
    for src, trg in combined_dataloader:
        M.optimizer.zero_grad()

        output = M.forward(src)

        # loss = torch.pow(output - trg, 2).sum().sqrt()
        loss = M.crossloss(output, trg.argmax(dim=1))

        loss.backward()

        M.optimizer.step()

In [304]:
print ("\nloss :\t", loss.item())
print ("input :\t", M.x0)
print ("output :", output)
print ("target :", trg)
print ("\nW1 grad :\t", M.W1.grad)
print ('W1 grad max :\t', M.W1.grad.max())


loss :	 6.234299659729004
input :	 tensor([[1., 0., 0., 0., 0., 0., 1., 0., 0.]])
output : tensor([[  4.9632,  -8.2869,   1.2158,  -1.2174, -10.2340,  -0.3738, -13.9424,
          -8.7958,   1.2641]], grad_fn=<AddBackward0>)
target : tensor([[0., 0., 0., 2., 0., 0., 0., 0., 0.]])

W1 grad :	 tensor([[ 1.6862e+03,  0.0000e+00,  2.4608e+03, -4.5621e+02, -1.2904e+03,
         -1.3063e+03,  6.5331e+02, -1.8821e+03,  5.8170e+02,  0.0000e+00,
          8.5032e+02,  0.0000e+00, -2.8109e+03,  1.8126e+03,  0.0000e+00,
          0.0000e+00,  0.0000e+00, -1.0589e+03,  0.0000e+00,  3.4909e+02,
         -7.5309e+02, -2.7704e+02,  5.7016e+02,  2.6361e+01,  5.0517e+02,
         -6.7303e+01,  0.0000e+00,  9.5539e+01,  4.0936e+02,  0.0000e+00,
         -4.2361e+02, -3.1539e+02,  1.3330e+03,  2.4388e+03,  0.0000e+00,
          0.0000e+00, -5.5776e+02, -1.6763e+03,  2.9357e+03,  1.6128e+02,
          0.0000e+00,  3.7424e+02,  1.4474e+02,  1.4145e+02, -6.9596e+02,
          3.1385e+02,  3.4012e+02,  0.00

In [275]:
i = 3

src_i = combined_dataloader.dataset.input_tensor[i]
trg_i = combined_dataloader.dataset.output_tensor[i]

print ("\ninput  :", src_i)
print ("target :", trg_i)

debug_output = M.forward(src_i)
print ("debug output :", debug_output)

# debug_loss = torch.pow(debug_output - trg_i, 2).sum().sqrt()
debug_loss = M.crossloss(debug_output, trg_i.argmax(dim=0))
print ("debug loss :\t", debug_loss)

debug_loss.backward()
print ("W1 grad :\t", M.W1.grad)




input  : tensor([1., 0., 0., 0., 0., 0., 1., 0., 0.])
target : tensor([0., 0., 0., 2., 0., 0., 0., 0., 0.])
debug output : tensor([ 13.8007,  14.4762,  -3.2627,   0.8664,  -0.0930,  14.1155,   7.1295,
        -13.2803,   5.6219], grad_fn=<AddBackward0>)
debug loss :	 tensor(14.4014, grad_fn=<NllLossBackward0>)
W1 grad :	 tensor([[-6.0175e+02, -3.3428e+02,  0.0000e+00,  1.4126e+03,  4.9800e+02,
          3.0197e+03, -7.3280e+02, -1.6629e+01,  6.2729e+02,  7.4260e+02,
          7.0546e+02,  6.0493e+02,  2.5775e+02, -1.1757e+03,  0.0000e+00,
          1.2607e+03,  0.0000e+00,  0.0000e+00, -1.0675e+03,  1.1895e+02,
         -1.3616e+03,  4.5454e+02,  3.2306e+02,  0.0000e+00,  0.0000e+00,
         -7.1341e+01,  0.0000e+00,  1.4906e+01,  0.0000e+00,  7.8248e+02,
          1.4336e+03,  0.0000e+00, -2.0557e+02,  8.9485e-01,  0.0000e+00,
         -6.2536e+02,  2.3915e+01,  0.0000e+00,  6.1461e+02,  2.4828e+01,
          0.0000e+00, -2.1159e+02,  0.0000e+00,  4.8831e+02,  1.8171e+02,
          

TEST FOR UNDERSTAND HOW TO DEBUG 

In [33]:
with torch.enable_grad():
    i = 0
    
    src = inputdata.dataset[0]

    output = M.forward(src)
    
    print("output :", output)

    print("prediction :", loader.prediction_tensor[i])

    loss = (output - loader.prediction_tensor[i]).sum()
    print(loss)
    loss.backward()
    print(M.W1.grad)



output : tensor([ 1.5546,  3.0224, -6.8387, -7.0046, -1.3878,  2.8336,  0.3625,  1.7013,
        -6.9491], grad_fn=<AddBackward0>)
prediction : tensor([0., 2., 0., 0., 0., 0., 0., 0., 0.])
tensor(-14.7057, grad_fn=<SumBackward0>)
tensor([[ 10.6867,  -1.9877,   1.1917,  -0.5787, -22.9481,   0.0000, -15.2521,
           0.5593,   0.0000,   0.0000,   0.0000,  26.1689, -19.8233,   0.0000,
           0.2757,   0.0000,   7.1038,   6.2546,  -2.3206, -12.3942,   1.0800,
         -18.5428,   0.7590,  -0.8824,   0.0000,   0.0000,   2.3690,  13.4138,
         -19.3113,  -0.0856,  -0.7895,  21.0843,  -6.5768, -32.5610,  16.7636,
          -2.5015,   2.3915,  -9.3138,  -8.5426,  11.7095,   0.0000,  -3.1289,
           1.1421, -20.1334,  -0.4368,  -3.1831,   5.8103,  -7.9939,  30.7856,
           1.6191,  -7.0927,   3.2435,   0.0000,   4.6845],
        [  3.5622,  -0.4969,   0.4767,  -0.2894, -11.4740,   0.0000,  -7.6260,
           0.0000,  -0.1904,   0.0000,   0.0000,  10.4675,  -7.9293,  -2.2635,

In [310]:
with torch.no_grad():

    M.W1.grad = M.W1.grad * 0
    newoutput = M.forward(src)
    print(newoutput)
    newloss = torch.abs(newoutput - loader.prediction_tensor[i]).sum()
    print(newloss)
    newloss = torch.pow(newoutput - loader.prediction_tensor[i], 2).sum().sqrt()
    print(newloss)
    # newloss.backward()
    M.W1 -= (M.W1.grad) * 0.0001
    

tensor([[  2.7487,  -8.7213,   1.5931,   1.6779,  -9.6748,  -4.3287, -11.8560,
          -6.7841,   1.7666]])
tensor(47.7955)
tensor(19.7055)


In [20]:
print(loader.prediction_tensor[i])

tensor([0., 2., 0., 0., 0., 0., 0., 0., 0.])


Remarque :
- Si l'on commence un entrainement et que le modèle ne devient pas assez précis c'est fini.
- Ma fonction de loss n'était pas assez précise ainsi les gradient devenait de plus en plus grand et donc l'ajustement divergeait
- Lorsque que j'ai changé ma fonction de loss j'ai re-entrainé mais sans amélioration (étant donné que les tenseurs étaient déja bien trop grand c'était impossible à rattraper)
- lorsque j'ai reset les matrice est ré-entrainée j'ai obtenu une loss + que correcte (environ 0.0003)
- Sur un dataset petit (donc surentrainement pour les valeurs actuelle) si le batch_size fait la taille du dataset l'entrainement n'est pas possible cela diverge trop
- si il est de moitié l'entrainement à du mal tout de même, il faut donc pour les gros dataset, trouver le rapport (efficience / rapidité)
- lorsque l'on train donc sur un batch_size de 1 sans que la loss diverge (aléatoire j'ai bien l'impression)
- update : la loss qui était petit à la fin de mon entrainement correspondait à un input unique ainsi les autres test n'étaient pas si concluant.....
problématique :
- il faut le faire tourner un nombre N* la taille du datasets ?
- faut'il améliorer le batch_size afin d'avoir un cohérence d'erreur pour tous ?
-