In [2]:
import torch
import torch.nn as nn
import pandas as pd
from torch.utils.data import DataLoader, random_split
from conversion import CSVToTensor

class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.input_size = 9
        self.output_size = 9
        self.hidden_size = 2*27

        self.x0 = None
        self.x1 = None
        self.x2 = None
        self.x3 = None

        self.W1 = nn.Parameter(torch.randn(self.input_size, self.hidden_size))
        self.b1 = nn.Parameter(torch.randn(self.hidden_size))

        self.W2 = nn.Parameter(torch.randn(self.hidden_size, self.output_size))
        self.b2 = nn.Parameter(torch.randn(self.output_size))
        self.relu = nn.ReLU()
        # self.dropout = nn.Dropout(0.3)
        self.crossloss = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.train_data = None
        self.val_data = None
    
    def forward(self, x):
        self.x0 = x
        self.x1 = x @ self.W1 + self.b1
        self.x2 = self.relu(self.x1)
        self.x3 = self.x2 @ self.W2 + self.b2
        # x = self.dropout(x)
        
        return self.x3

    def load_data(self, file_path):
        dataloader = CSVToTensor(file_path)
        dataloader.create_all_tensor()
        dataset = dataloader.create_a_dataset()

        train_size = int(0.8 * len(dataset))
        val_size = len(dataset) - train_size
        self.train_data, self.val_data = random_split(dataset, [train_size, val_size])
        self.train_data = DataLoader(self.train_data, batch_size=16, shuffle=True)
        self.val_data = DataLoader(self.val_data, batch_size=16, shuffle=True)

    def train_model(self, epochs):
        self.to(self.device)
        self.train()
        for epoch in range(epochs):
            epoch_loss = 0
            for src, trg in self.train_data:
                src = src.to(self.device)
                trg = trg.to(self.device)
                self.optimizer.zero_grad()
                output = self.forward(src)
                loss = self.crossloss(output, trg.argmax(dim=1))
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.item()
            avg_loss = epoch_loss / len(self.train_data)
            print(f"Epoch: {epoch}, Loss: {avg_loss:.4f}")
    

In [None]:
# if __name__ == '__main__':
#     model = Model()
#     # choose the dataset file path
#     model.load_data('./Datasets/tic_tac_toe_500_games.csv')
#     # choose the number of epochs
#     with torch.no_grad():
#         model.W1.copy_(torch.zeros(model.input_size, model.hidden_size))
#         model.b1.copy_(torch.ones(model.hidden_size))
#         model.W2.copy_(torch.zeros(model.hidden_size, model.output_size))
#         model.b2.copy_(torch.ones(model.output_size))
#     model.train_model(11)

In [3]:
M = Model()

### Neural Network Weight Calculation by Human Reflection

The following three cells demonstrate how a neural network's weights can be manually calculated to understand how updates affect the network. This process is aimed at providing educational insight into debugging neural networks.

We consider 6 possible input scenarios and manually calculate the appropriate weights to ensure correctness for each scenario without causing errors in others.

(Note: This is purely for educational purposes to understand debugging and does not alter the models themselves.)

---

In [4]:
mytensor1 = torch.tensor([[1,0,1],[0,0,0],[0,0,0]], dtype=torch.float32)
mytensor1 = mytensor1.reshape(1,9)
outtensor1 = torch.tensor([[0,2,0,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor1.view(3,3))
print("-----------------")

mytensor2 = torch.tensor([[1,1,0],[0,0,0],[0,0,0]], dtype=torch.float32)
mytensor2 = mytensor2.reshape(1,9)
outtensor2 = torch.tensor([[0,0,2,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor2.view(3,3))
print("-----------------")

mytensor3 = torch.tensor([[0,1,1],[0,0,0],[0,0,0]], dtype=torch.float32)
mytensor3 = mytensor3.reshape(1,9)
outtensor3 = torch.tensor([[2,0,0,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor3.view(3,3))
print("-----------------")

mytensor4 = torch.tensor([[1,0,0],[0,0,0],[1,0,0]], dtype=torch.float32)
mytensor4 = mytensor4.reshape(1,9)
outtensor4 = torch.tensor([[0,0,0,2,0,0,0,0,0]], dtype=torch.float32)

print(mytensor4.view(3,3))
print("-----------------")

mytensor5 = torch.tensor([[1,0,0],[1,0,0],[0,0,0]], dtype=torch.float32)
mytensor5 = mytensor5.reshape(1,9)
outtensor5 = torch.tensor([[0,0,0,0,0,0,2,0,0]], dtype=torch.float32)

print(mytensor5.view(3,3))
print("-----------------")

mytensor6 = torch.tensor([[0,0,0],[1,0,0],[1,0,0]], dtype=torch.float32)
mytensor6 = mytensor6.reshape(1,9)
outtensor6 = torch.tensor([[2,0,0,0,0,0,0,0,0]], dtype=torch.float32)

print(mytensor6.view(3,3))
print("-----------------")

tensor([[1., 0., 1.],
        [0., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[1., 1., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[0., 1., 1.],
        [0., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[1., 0., 0.],
        [0., 0., 0.],
        [1., 0., 0.]])
-----------------
tensor([[1., 0., 0.],
        [1., 0., 0.],
        [0., 0., 0.]])
-----------------
tensor([[0., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])
-----------------


In [152]:
with torch.no_grad():
    M.W1.copy_(torch.zeros(M.input_size, M.hidden_size))
    M.b1.copy_(torch.zeros(M.hidden_size))
    M.W2.copy_(torch.zeros(M.hidden_size, M.output_size))
    M.b2.copy_(torch.zeros(M.output_size))

    M.W1[0, 1] = 2
    M.W1[0, 0] = -2
    M.W1[0, 3] = 2

    M.W1[1,1] = -2
    M.W1[1, 2] = 2
    M.W1[1, 3] = -2
        
    M.W1[2, 0] = 2
    M.W1[2, 2] = -2
    M.W1[2 , 3] = - 2
    
    M.W1[3, 6] = 2
    M.W1[3 ,3] = -2
    M.W1[3, 1] = -2
    
    M.W1[6, 1] = -2
    M.W1[6, 6] = -2
    M.W1[6, 0] = 2
    M.W2[0:M.output_size , 0:M.output_size] = torch.eye(M.output_size)
print(M.W1)
# print(M.b1)
# print(M.W2)

Parameter containing:
tensor([[-2.,  2.,  0.,  2.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0., -2.,  2., -2.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 2.,  0., -2., -2.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
          0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
        [ 0., -2.,  0., -2.,  0.,  0.,  2.,  0.,  0.,  0.

In [153]:
output = M.forward(mytensor6)
x0 = M.x0
x1 = M.x1
x2 = M.x2
x3 = M.x3

print(x0)
print("----------------")
print(output)
print("----------------")
print("Excepted ")
print(outtensor2)
print("----------------")
print("Output argmax :")
output.argmax()
# loss = mytensor2 - outtensor2
# loss.sum()

tensor([[0., 0., 0., 1., 0., 0., 1., 0., 0.]])
----------------
tensor([[2., 0., 0., 0., 0., 0., 0., 0., 0.]], grad_fn=<AddBackward0>)
----------------
Excepted 
tensor([[0., 0., 2., 0., 0., 0., 0., 0., 0.]])
----------------
Output argmax :


tensor(0)

In [154]:
f1 = lambda in0, out0: out0-in0
# example of how work lambda function

### Neural Network Weight Calculation by Machine Reflection

The following three cells demonstrate how a neural network's weights can be calculated using machine reflection to understand how updates affect the network. This process is aimed at providing educational insight into debugging neural networks.

For each line of code, we'll reflect on the machine's operations to ensure correctness. We consider 6 possible input scenarios and calculate the appropriate weights to ensure they are correct for each scenario without causing errors in others.

The values are manually updated based on the results of the backpropagation provided by the model.

The dataset for training will be composed only of the 6 tensor that you seen above and will be in the ./Datasets/debug.csv

(Note: This is purely for educational purposes to understand debugging through machine reflection and does not alter the models themselves.)

---

In [38]:
with torch.enable_grad():
    M.W1 = nn.Parameter(torch.randn(M.input_size, M.hidden_size))
    
    M.b1 = nn.Parameter(torch.randn(M.hidden_size))
    M.W2 = nn.Parameter(torch.randn(M.hidden_size, M.output_size))
    M.b2 = nn.Parameter(torch.randn(M.output_size))

In [50]:
W1_backup = M.W1

In [77]:
#prepare the data for training,
# we don't split the data because we will evaluate manually
path = './Datasets/debug.csv'

loader = CSVToTensor(path)
loader.create_all_tensor()

inputdata = DataLoader(loader.game_tensor, batch_size=3, shuffle=True)
outputdata =
print(inputdata.dataset)

tensor([[1., 0., 1., 0., 0., 0., 0., 0., 0.],
        [1., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 1., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 1., 0., 0.],
        [1., 0., 0., 1., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 1., 0., 0.]])


In [79]:
with torch.enable_grad():
    for _ in range(1):
        i = 0
        for src, trg in inputdata:

            M.optimizer.zero_grad()

            print("Input:\t", src)
            print("In the model: \t", M.x0)
            output = M.forward(src)
            print("Output:\t", output)

            loss = (output - loader.prediction_tensor[i]).sum()
            print("Loss:\t", loss)

            loss.backward()
            print("Gradients after backward pass:")

            for name, param in M.named_parameters():
                if param.grad is not None:
                    print(f"{name}.grad:\t", param.grad)

            M.optimizer.step()

            print("Updated parameters:")
            for name, param in M.named_parameters():
                print(f"{name}:\t", param.data)

            i += 1
            print("---------------------------------------------------")

ValueError: too many values to unpack (expected 2)

In [39]:
with torch.enable_grad():
    i = 0
    
    src = inputdata.dataset[0]

    output = M.forward(src)
    
    print("output :", output)

    print("prediction :", loader.prediction_tensor[i])

    loss = (output - loader.prediction_tensor[i]).sum()
    print(loss)
    loss.backward()
    print(M.W1.grad)



output : tensor([14.5851, -9.3917,  9.3108,  7.2891,  1.5164, 15.5351,  1.7170,  0.2508,
        -6.8269], grad_fn=<AddBackward0>)
prediction : tensor([0., 2., 0., 0., 0., 0., 0., 0., 0.])
tensor(31.9856, grad_fn=<SumBackward0>)
tensor([[ 0.0000, -4.0823,  7.6509,  0.0000, -0.0931,  0.1976,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  3.7337,  0.0000,
          0.6392,  0.0000,  0.0000,  3.9981, -4.2550,  0.9692,  0.0000,  0.1386,
          0.0000, -1.0487,  0.0000,  1.8483,  0.0000,  0.0000,  0.0000, -0.2578,
          0.0000,  0.0000, -2.4139,  2.8677,  0.0000, -0.4256,  0.0000,  4.0822,
          0.0000, -1.2120,  0.0000,  4.3437,  0.0000,  2.9954,  0.0000, -2.1082,
          0.0000,  3.2973,  0.0000,  0.0000,  0.0000,  0.0000],
        [ 0.0000, -0.0000,  0.0000,  0.0000, -0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000, -0.0000,  0.00

In [83]:
with torch.no_grad():

    M.W1 -= (M.W1.grad) * 0.0001
    newoutput = M.forward(src)
    print(newoutput)
    newloss = torch.abs(newoutput - loader.prediction_tensor[i]).sum()
    print(newloss)
    newloss = torch.pow(newoutput - loader.prediction_tensor[i], 2).sum().sqrt()
    print(newloss)
    M.W1.grad = M.W1.grad * 0
    

tensor([[  8.5642,  -2.4050,  -3.6986,   8.8551,   7.3905,  11.7749,   3.9249,
           3.0929, -14.5921],
        [  9.6147, -10.4931,   4.9174,   3.2626,   0.3424,   6.4550,  -1.3300,
           0.7618, -11.5593],
        [ 15.9046,  -2.8779,   0.2119,   6.2596,   6.4786,  14.1475,   6.4079,
          -0.1375,   0.0965]])
tensor(171.5567)
tensor(40.9427)


In [73]:
print(loader.prediction_tensor[i])

tensor([0., 2., 0., 0., 0., 0., 0., 0., 0.])
