<a href="https://colab.research.google.com/github/ratulb/llmlite.mojo/blob/main/Tensor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
class Tensor:
    def __init__(self, value, requires_grad=False):
        self.value = value
        self.requires_grad = requires_grad
        self.grad = 0.0
        self._backward = lambda: None
        self._prev = set()
        self.name = ""

    def __add__(self, other):
        out = Tensor(self.value + other.value, self.requires_grad or other.requires_grad)

        def _backward():
            if self.requires_grad:
                self.grad += out.grad
            if other.requires_grad:
                other.grad += out.grad

        out._backward = _backward
        out._prev = {self, other}
        return out

    def __sub__(self, other):
        out = Tensor(self.value - other.value, self.requires_grad or other.requires_grad)

        def _backward():
            if self.requires_grad:
                self.grad += out.grad
            if other.requires_grad:
                other.grad -= out.grad

        out._backward = _backward
        out._prev = {self, other}
        return out

    def __mul__(self, other):
        out = Tensor(self.value * other.value, self.requires_grad or other.requires_grad)

        def _backward():
            if self.requires_grad:
                self.grad += other.value * out.grad
            if other.requires_grad:
                other.grad += self.value * out.grad

        out._backward = _backward
        out._prev = {self, other}
        return out

    def backward(self):
        topo = []
        visited = set()

        def build_topo(tensor):
            if tensor not in visited:
                visited.add(tensor)
                for parent in tensor._prev:
                    build_topo(parent)
                topo.append(tensor)

        build_topo(self)

        self.grad = 1.0  # starting gradient
        for t in reversed(topo):
            t._backward()


In [None]:
# Setup
A = Tensor(2.0, requires_grad=True); A.name = 'A'
B = Tensor(3.0, requires_grad=True); B.name = 'B'
D = Tensor(4.0, requires_grad=True); D.name = 'D'

# Graph
C = A + B; C.name = 'C'
E = C + D; E.name = 'E'
F = E + A; F.name = 'F'
G = F * A; G.name = 'G'

#G = (A + B + D + A) * A

# Backward
G.backward()

# Print grads
for t in [A, B, C, D, E, F, G]:
    print(f"{t.name}: value={t.value}, grad={t.grad}, requires_grad={t.requires_grad}")


In [None]:
A = Tensor(2.0, requires_grad=True); A.name = 'A'
B = Tensor(3.0, requires_grad=True); B.name = 'B'
C = Tensor(4.0, requires_grad=True); C.name = 'C'
D = Tensor(5.0, requires_grad=True); D.name = 'D'

E = A + B; E.name = 'E'
F = C * D; F.name = 'F'
G = E - F; G.name = 'G'

G.backward()
for t in [A, B, C, D, E, F, G]:
    print(f"{t.name}: value={t.value}, grad={t.grad}, requires_grad={t.requires_grad}")

In [None]:
import torch

In [None]:

import torch

# Step 1: Define inputs
A = torch.rand((3, 4), requires_grad=True)
B = torch.rand((3, 4), requires_grad=True)
C = torch.rand((3, 4), requires_grad=True)
D = torch.rand((3, 4), requires_grad=True)

# Step 2: Build graph
E = A + B * D
F = 2 * C * D - A
G = E - F * 2  # G = A + B*D - 2*(2*C*D - A)

# Step 3: Reduce G to scalar
loss = G.sum()   # You could also do mean(), or other scalar functions

# Step 4: Backward
loss.backward()

# Step 5: Get gradients
print("∂loss/∂A:\n", A.grad)
print("∂loss/∂B:\n", B.grad)
print("∂loss/∂C:\n", C.grad)
print("∂loss/∂D:\n", D.grad)
print("∂loss/∂E:\n", E.grad)
print("∂loss/∂F:\n", F.grad)
print("∂loss/∂G:\n", G.grad)



In [31]:
import torch

torch.manual_seed(42)

# Define inputs
A = torch.rand((3, 4), requires_grad=True)
B = torch.rand((3, 4), requires_grad=True)
C = torch.rand((3, 4), requires_grad=True)
D = torch.rand((3, 4), requires_grad=True)

# Build computation
E = A + B * D
F = 2 * C * D - A
G = E - F * 2  # G has shape (3, 4), not scalar
E.retain_grad()
F.retain_grad()
G.backward(gradient=torch.ones_like(G))



# View grads
print("∂loss/∂A:\n", A.grad)
print("∂loss/∂B:\n", B.grad)
print("∂loss/∂C:\n", C.grad)
print("∂loss/∂D:\n", D.grad)
print("∂loss/∂E:\n", E.grad)
print("∂loss/∂F:\n", F.grad)


∂loss/∂A:
 tensor([[3., 3., 3., 3.],
        [3., 3., 3., 3.],
        [3., 3., 3., 3.]])
∂loss/∂B:
 tensor([[0.5779, 0.9040, 0.5547, 0.3423],
        [0.6343, 0.3644, 0.7104, 0.9464],
        [0.7890, 0.2814, 0.7886, 0.5895]])
∂loss/∂C:
 tensor([[-2.3117, -3.6159, -2.2186, -1.3693],
        [-2.5374, -1.4576, -2.8417, -3.7856],
        [-3.1561, -1.1257, -3.1545, -2.3579]])
∂loss/∂D:
 tensor([[ 0.4481, -0.5103, -0.6942, -0.3681],
        [-1.3033,  0.5493, -3.5396,  0.3264],
        [-3.2744, -1.8915, -1.0537, -2.4042]])
∂loss/∂E:
 tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])
∂loss/∂F:
 tensor([[-2., -2., -2., -2.],
        [-2., -2., -2., -2.],
        [-2., -2., -2., -2.]])
