In [1]:
%run matrices.ipynb

In [2]:
import torch
import torch.nn as nn
import random

class MLP_TORCH(nn.Module):
    def __init__(self, layer_sizes, activations = []):
        super(MLP_TORCH, self).__init__()
        layers = []
        self.activations = []
        for i in range(len(layer_sizes) - 1):
            layers.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
            if(i < len(activations)):
                self.activations.append(None if activations[i] == None else activations[i])
            else:
                self.activations.append(None)
        self.network = nn.ModuleList(layers)
        
    def get_act_func(self, name):
        if(name == "tanh"):
            return nn.Tanh
        if(name == "relu"):
            return nn.ReLU
        if(name == "sigmoid"):
            return nn.Sigmoid
        if(name == "softmax"):
            return nn.Softmax

    def forward(self, x):
        for idx, layer in enumerate(self.network):
            x = layer(x)
            if self.activations[idx]:
                act_func = self.get_act_func(self.activations[idx])()
                x = act_func(x)
        return x

    def set_parameters(self, weights, biases):
        with torch.no_grad():
            for idx, layer in enumerate(self.network):
                layer.weight = nn.Parameter(torch.tensor(weights[idx], dtype=torch.float32).T)
                layer.bias = nn.Parameter(torch.tensor(biases[idx], dtype=torch.float32))
                
    def get_parameters(self):
        weights = []
        biases = []
        for idx, layer in enumerate(self.network):
            weights.append(layer.weight)
            biases.append(layer.bias)
        return weights, biases

def get_err_func(name):
    if(name == "mse"):
        return nn.MSELoss

def compare_weights_and_biases_values(mlp_torch, mlp):
    e_weights, e_biases = mlp_torch.get_parameters()
    weights, biases = mlp.get_parameters()

    for i in range(len(weights)):
        expected = e_weights[i].T
        current = torch.tensor(weights[i].data)
        assert torch.allclose(expected, current, rtol=1e-4, atol=1e-4), \
            f"Weights mismatch in layer {i}:\nExpected: {expected}\nProvided: {current}"
    for i in range(len(biases)):
        expected = e_biases[i].flatten()
        current = torch.tensor(biases[i].data).flatten()
        assert torch.allclose(expected, current, rtol=1e-4, atol=1e-4), \
            f"Biases mismatch in layer {i}:\nExpected: {expected}\nProvided: {current}"

    #print("Parameter values Match")
        
def compare_weights_and_biases_grads(mlp_torch, mlp):
    e_weights, e_biases = mlp_torch.get_parameters()
    weights, biases = mlp.get_parameters()

    for i in range(len(weights)):
        expected = e_weights[i].grad.T
        current = torch.tensor(weights[i].grad)
        assert torch.allclose(expected, current, rtol=1e-4, atol=1e-4), \
            f"Weights grads mismatch in layer {i}:\nExpected: {expected}\nProvided: {current}"

    for i in range(len(biases)):
        expected = e_biases[i].grad.flatten()
        current = torch.tensor(biases[i].grad).flatten()
        assert torch.allclose(expected, current, rtol=1e-4, atol=1e-4), \
            f"Biases grads mismatch in layer {i}:\nExpected: {expected}\nProvided: {current}"

    # print("Parameter grads Match")

def compare_outputs(o1,o2,i):
    if o1.dtype != torch.float32:
        o1 = o1.float()
    if o2.dtype != torch.float32:
        o2 = o2.float()
    assert torch.allclose(o1, o2, rtol=1e-4, atol=1e-4), \
        f"Output mismatch at batch starting index {i}:\nMLP Output: {o1}\nTorch Output: {o2}"
    #print(f"Outputs match for this batch {i}")

def compare_mlp(layer_sizes, x, y, batch_size, learning_rate, epochs):
    weights = []
    biases = []
    activations = ["tanh", "relu", "softmax"]
    err_func = "mse"
    for l in range(len(layer_sizes) - 1):
        weights.append([[random.uniform(-1, 1) for _ in range(layer_sizes[l + 1])] for _ in range(layer_sizes[l])])
        biases.append([random.uniform(-1, 1) for _ in range(layer_sizes[l + 1])])
    mlp = MLP(layer_sizes, activations)
    mlp.set_parameters(weights, biases)

    mlp_torch = MLP_TORCH(layer_sizes, activations)
    mlp_torch.set_parameters(weights, biases)

    compare_weights_and_biases_values(mlp_torch, mlp)

    optimizer = torch.optim.SGD(mlp_torch.parameters(), lr=learning_rate)
    
    for epoch in range(epochs):
        for i in range(0, len(x), batch_size):
            x_batch = x[i:i + batch_size]
            y_batch = y[i:i + batch_size]

            mlp_output = mlp.forward(Tensor(x_batch))
            
            torch_output = mlp_torch.forward(torch.tensor(x_batch))
            
            compare_outputs(torch.tensor(mlp_output.data), torch_output.data, i)

            torch_loss_func = get_err_func(err_func)()
            torch_loss = torch_loss_func(torch_output, torch.tensor(y_batch))
            loss = err_functions[err_func](mlp_output, Tensor(y_batch))
            
            loss.backward()
            
            torch_loss.backward()
            
            mlp.step(learning_rate)
            optimizer.step()
            compare_weights_and_biases_grads(mlp_torch, mlp)

            optimizer.zero_grad()
            mlp.zero_grad()
            #print(f"Epoch {epoch + 1}/{epochs} Test Passed")
    
    

In [3]:
import random

def random_test():
    input_size = random.randint(2, 16)
    output_size = random.randint(2, 16)
    hidden_layers = [random.randint(1,4) for _ in range(random.randint(1,16))]
    layers = [input_size] + hidden_layers + [output_size]
    record_num = random.randint(2, 1024)
    batch_size = random.randint(1, record_num)
    learning_rate = random.uniform(0.0001, 0.01)
    epochs = random.randint(1, 100)
    
    x = []
    y = []
    for _ in range(record_num):
        x.append([random.uniform(-5,5) for _ in range(input_size)])
        y.append([random.uniform(-5,5) for _ in range(output_size)])
    print(f"Layers: {layers}")
    print(f"Record number: {record_num}")
    print(f"Batch Size: {batch_size}")
    print(f"Learning Rate: {learning_rate}")
    print(f"Epochs: {epochs}")
    compare_mlp(layers,x,y, batch_size, learning_rate, epochs)
    print("Passed")
    print("---------------------------------")
    


In [5]:
for _ in range(100):
    random_test()

Layers: [13, 2, 1, 2, 3, 3, 4, 3, 4, 4, 4, 1, 2, 14]
Record number: 91
Batch Size: 82
Learning Rate: 0.006630281022362285
Epochs: 96
Passed
---------------------------------
Layers: [14, 1, 2, 2, 2, 2, 3, 14]
Record number: 95
Batch Size: 88
Learning Rate: 0.005268098812423796
Epochs: 30
Passed
---------------------------------
Layers: [12, 1, 1, 3, 1, 1, 4, 1, 1, 3, 1, 1, 4, 3, 3, 9]
Record number: 985
Batch Size: 661
Learning Rate: 0.008890883129914415
Epochs: 86
Passed
---------------------------------
Layers: [9, 4, 1, 3, 1, 4, 4, 4, 3, 1, 1, 16]
Record number: 758
Batch Size: 665
Learning Rate: 0.00690449055939041
Epochs: 42
Passed
---------------------------------
Layers: [6, 1, 4, 3, 4, 2, 1, 4, 3, 1, 1, 7]
Record number: 516
Batch Size: 29
Learning Rate: 0.0036276985532656585
Epochs: 90
Passed
---------------------------------
Layers: [10, 1, 1, 2, 3, 1, 2, 4, 2, 3, 4, 2, 3, 2, 4, 4, 4]
Record number: 838
Batch Size: 821
Learning Rate: 0.007560623132638472
Epochs: 94
Passed
---

In [5]:
"""
This test case failed once. Can't reproduce. Probably caused by how pytorch handles floating point arithmetics
AssertionError: Weights grads mismatch in layer 0:
Expected: tensor([[-0.0052, -0.0188],
        [ 0.0107, -0.0067],
        [ 0.0038, -0.0006],
        [-0.0027, -0.0385],
        [ 0.0029, -0.0005],
        [-0.0034,  0.0159],
        [-0.0055, -0.0004],
        [-0.0142, -0.0040],
        [-0.0067, -0.0030],
        [ 0.0100, -0.0203]])
Provided: tensor([[-0.0054, -0.0188],
        [ 0.0104, -0.0067],
        [ 0.0042, -0.0006],
        [-0.0028, -0.0385],
        [ 0.0030, -0.0005],
        [-0.0037,  0.0159],
        [-0.0052, -0.0004],
        [-0.0140, -0.0040],
        [-0.0067, -0.0030],
        [ 0.0102, -0.0203]])
"""

layers =[10, 2, 1, 3, 2]
record_num =  751
batch_size= 586
learning_rate =  0.00284773637316031
epochs = 73
x = []
y = []
for _ in range(record_num):
    x.append([random.uniform(-5,5) for _ in range(10)])
    y.append([random.uniform(-5,5) for _ in range(2)])
for i in range(50):
    compare_mlp(layers,x,y, batch_size, learning_rate, epochs)
    print(i , "Passed")

0 Passed
1 Passed
2 Passed
3 Passed
4 Passed
5 Passed
6 Passed
7 Passed
8 Passed
9 Passed
10 Passed
11 Passed
12 Passed
13 Passed
14 Passed
15 Passed
16 Passed
17 Passed
18 Passed
19 Passed
20 Passed
21 Passed
22 Passed
23 Passed
24 Passed
25 Passed
26 Passed
27 Passed
28 Passed
29 Passed
30 Passed
31 Passed
32 Passed
33 Passed
34 Passed
35 Passed
36 Passed
37 Passed
38 Passed
39 Passed
40 Passed
41 Passed
42 Passed
43 Passed
44 Passed
45 Passed
46 Passed
47 Passed
48 Passed
49 Passed
