## 1. Create a simple neural network

In [None]:
from torch_scatter.testing import reductions

from nn import Linear, Module
from optim import Adam
from tensor import Tensor

In [None]:
class MLP(Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = Linear(3, 3)
        self.linear_2 = Linear(3, 6)
        self.linear_3 = Linear(6, 1)

    def forward(self, x):
        x = FF.tanh(self.linear_1(x))
        x = FF.tanh(self.linear_2(x))
        x = FF.tanh(self.linear_3(x))
        return x



In [None]:
model = MLP()
optim = Adam(model.parameters()) 

In [None]:
import numpy as np

X = [
    [2.0, 3.0, -1.0],
    [3.0, -1.0, 0.5],
    [0.5, 1.0   , 1.0],
    [1.0, 1.0, -1.0]
]

Y = [[1.0], [-1.0], [-1.0], [1.0]]

X = Tensor(X) 
Y = Tensor(Y) 

In [None]:
def mse_loss(predictions, targets):
    return ((predictions - targets) ** 2).sum() / predictions.shape[0]

In [None]:
epochs = 100

for epoch in range(epochs):
    predictions = model(X)
    loss = mse_loss(predictions, Y)
    optim.zero_grad()
    loss.backward()
    optim.step() 
    print(f"Epoch {epoch}, Loss: {loss.data}")

## Example 2: Comparison between derivative calculation in PyTorch and MinTorch

In [None]:
import torch
import nn.functional as FF

import torch.nn.functional as F


def arrays_are_close(arr_1, arr_2):
    print(np.allclose(arr_1, arr_2, atol=1e-5))

val_a = [
    [1, 2, 3, 4],
    [4, 5, 6, 10],
    [9, -1, 1, 1]
]

val_b = [
    [1, -2, 3, 4],
    [4, 4, -6, 10],
    [1, -1, 0, 1]
]
val_q = [
    [2, 2, 2],
    [2, 2, 2],
    [2, 2, 2],
    [2, 2, 2],
    [2, 2, 2]
]

a_torch = torch.tensor(val_a, requires_grad=True, dtype=torch.float32)
b_torch = torch.tensor(val_b, requires_grad=True, dtype=torch.float32)
q_torch = torch.tensor(val_q, requires_grad=True, dtype=torch.float32)
c_torch = F.linear(a_torch, b_torch)
d_torch = F.linear(c_torch, q_torch)
e_torch = d_torch.sum()
e_torch.backward()


a_mtorch = Tensor(val_a)
b_mtorch = Tensor(val_b)
c_mtorch = FF.linear(a_mtorch, b_mtorch)
q_mtorch = Tensor(val_q)
d_mtorch = FF.linear(c_mtorch, q_mtorch)
e_mtorch = d_mtorch.sum()
e_mtorch.backward()




arrays_are_close(a_torch.grad.detach().numpy(), a_mtorch.grad) # compare de/da
arrays_are_close(b_torch.grad.detach().numpy(), b_mtorch.grad) # compare de/da
arrays_are_close(c_torch.grad.detach().numpy(), c_mtorch.grad) # compare de/da
arrays_are_close(d_torch.grad.detach().numpy(), d_mtorch.grad) # compare de/da
arrays_are_close(e_torch.grad.detach().numpy(), e_mtorch.grad) # compare de/da
arrays_are_close(q_torch.grad.detach().numpy(), q_mtorch.grad) # compare de/da



## Example 3: High Dimensional Case

In [None]:
A = torch.tensor([
    [
        [1, 2, 3, 4]
    ],
    [
        [1, 9, -1, 4]
    ],
    [
        [1, 2, 3, -1]
    ]
], requires_grad=True, dtype=torch.float32)

B = torch.tensor([
    
        [1, 9, 3, 4]
    ,
    
        [0, 1, -1, -11]
    ,
    
        [1, 21, 11, -1]
    
], requires_grad=True, dtype=torch.float32)

C = F.linear(A, B)

D = C.sum()
D.backward()




AA = Tensor([
    [
        [1, 2, 3, 4]
    ],
    [
        [1, 9, -1, 4]
    ],
    [
        [1, 2, 3, -1]
    ]
])

BB = Tensor([
    
        [1, 9, 3, 4]
    ,
    
        [0, 1, -1, -11]
    ,
    
        [1, 21, 11, -1]
    
])

CC = FF.linear(AA, BB)
DD = CC.sum()
DD.backward()


arrays_are_close(D.grad.detach().numpy(), DD.grad) # compare de/da




## Example 4: Matrix Inversion Using a Linear Model

In [4]:
# Matrix Inverse Example
A = Tensor([
    [4.0, 7.0],
    [2.0, 6.0]
])


inverse_model = Linear(2, 2, bias=False)

def inverse_loss(A, A_inv):
    identity = Tensor(np.eye(A.shape[0]))
    return ((FF.linear(A, A_inv) - identity) ** 2).sum()

optim = Adam(inverse_model.parameters(), lr=0.01)

for epoch in range(10000):
    optim.zero_grad()
    A_inv = inverse_model(A)
    loss = inverse_loss(A, A_inv)
    loss.backward()
    optim.step()
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss.data}")

# Result
print("Original Matrix:\n", A.data)
print("Calculated Inverse:\n", inverse_model.weight.data)
print("Product Result:\n", FF.linear(A, A_inv))



NameError: name 'Linear' is not defined

In [2]:
import numpy as np

import torch
import torch.nn as nn_torch
import torch.nn.functional as F_torch

from tensor import Tensor
import nn as nn_mini
import nn.functional as F_mini


def arrays_are_close(arr_1, arr_2):
    return np.allclose(arr_1, arr_2, atol=1e-9)


mse_loss_torch = nn_torch.MSELoss(reduction='sum')
mse_loss_mini = nn_mini.MSELoss(reduction='sum')


a_torch = torch.tensor([[1, 2], [3, 4]], requires_grad=True, dtype=torch.float32)
b_torch = torch.tensor([[5, 6], [7, 8]], requires_grad=True, dtype=torch.float32)
y_torch = torch.tensor([[9, 2], [3, -1]], requires_grad=True, dtype=torch.float32)

y_hat_torch = F_torch.linear(a_torch, b_torch)
loss_torch = mse_loss_torch(y_hat_torch, y_torch)
loss_torch.backward()

a_mini = Tensor([[1, 2], [3, 4]])
b_mini = Tensor([[5, 6], [7, 8]])
y_mini = Tensor([[9, 2], [3, -1]])

y_hat_mini = F_mini.linear(a_mini, b_mini)
loss_mini = mse_loss_mini(y_hat_mini, y_mini)
loss_mini.backward()


print(arrays_are_close(a_torch.grad.detach().numpy(), a_mini.grad))
print(arrays_are_close(b_torch.grad.detach().numpy(), b_mini.grad))

True
True


In [3]:
a_torch.grad, a_mini.grad

(tensor([[ 374.,  432.],
         [1116., 1296.]]),
 array([[ 374.,  432.],
        [1116., 1296.]], dtype=float32))

In [15]:
from nn import Linear, Module, MSELoss
from optim import Adam
from tensor import Tensor
import nn.functional as F


class MLP(Module):
    def __init__(self):
        super().__init__()
        self.linear_1 = Linear(3, 3)
        self.linear_2 = Linear(3, 6)
        self.linear_3 = Linear(6, 1)

    def forward(self, x):
        x = F.tanh(self.linear_1(x))
        x = F.tanh(self.linear_2(x))
        x = F.tanh(self.linear_3(x))
        return x

model = MLP()
optim = Adam(model.parameters())
criterion = MSELoss()


X = Tensor([[2.0, 3.0, -1.0], [3.0, -1.0, 0.5], [0.5, 1.0, 1.0], [1.0, 1.0, -1.0]])
Y = Tensor([[1.0], [-1.0], [-1.0], [1.0]])

epochs = 100

for epoch in range(epochs):
    y_hat = model(X)
    loss = criterion(y_hat, Y)
    optim.zero_grad()
    loss.backward()
    optim.step()
    print(f"Epoch {epoch}, Loss: {loss.data}")

Epoch 0, Loss: 1.9441636800765991
Epoch 1, Loss: 1.9351037740707397
Epoch 2, Loss: 1.924615502357483
Epoch 3, Loss: 1.912492036819458
Epoch 4, Loss: 1.8985000848770142
Epoch 5, Loss: 1.882373332977295
Epoch 6, Loss: 1.863801121711731
Epoch 7, Loss: 1.842448353767395
Epoch 8, Loss: 1.8179742097854614
Epoch 9, Loss: 1.7900376319885254
Epoch 10, Loss: 1.758312463760376
Epoch 11, Loss: 1.7225126028060913
Epoch 12, Loss: 1.6824241876602173
Epoch 13, Loss: 1.637947916984558
Epoch 14, Loss: 1.5891469717025757
Epoch 15, Loss: 1.5362987518310547
Epoch 16, Loss: 1.4799386262893677
Epoch 17, Loss: 1.4208837747573853
Epoch 18, Loss: 1.3602203130722046
Epoch 19, Loss: 1.2992528676986694
Epoch 20, Loss: 1.2394320964813232
Epoch 21, Loss: 1.1823170185089111
Epoch 22, Loss: 1.1295578479766846
Epoch 23, Loss: 1.0827854871749878
Epoch 24, Loss: 1.0433828830718994
Epoch 25, Loss: 1.0122207403182983
Epoch 26, Loss: 0.9894610643386841
Epoch 27, Loss: 0.9744916558265686
Epoch 28, Loss: 0.9659960269927979
Ep