In [11]:
import numpy as np

class Operator:
    def __init__(self, *args):
        self.args = args

    def __call__(self):
        raise NotImplementedError

    def backward(self, grad):
        raise NotImplementedError
    
class Add(Operator):
    def __call__(self):
        return self.args[0] + self.args[1]

    def backward(self, grad):
        return grad, grad
    
class Mul(Operator):
    def __call__(self):
        return self.args[0] * self.args[1]

    def backward(self, grad):
        return grad * self.args[1], grad * self.args[0]
    
class Matmul(Operator):
    def __call__(self):
        return np.dot(self.args[0], self.args[1])

    def backward(self, grad):
        return np.dot(grad, self.args[1].T), np.dot(self.args[0].T, grad)
    
class Sum(Operator):
    def __call__(self):
        return np.sum(self.args[0])

    def backward(self, grad):
        return grad * np.ones_like(self.args[0])

class Tensor:
    def __init__(self, data, requires_grad=False):
        self.data = np.array(data)
        self.requires_grad = requires_grad
        self.grad = np.zeros_like(self.data) if requires_grad else None
        self._grad_fn = None

    def __add__(self, other):
        other_data = other.data if isinstance(other, Tensor) else other
        result = Tensor(self.data + other_data, requires_grad=self.requires_grad or (isinstance(other, Tensor) and other.requires_grad))
        if self.requires_grad:
            result._grad_fn = ('add', self, other)
        return result

    def __mul__(self, other):
        other_data = other.data if isinstance(other, Tensor) else other
        result = Tensor(self.data * other_data, requires_grad=self.requires_grad or (isinstance(other, Tensor) and other.requires_grad))
        if self.requires_grad:
            result._grad_fn = ('mul', self, other)
        return result

    def matmul(self, other):
        assert isinstance(other, Tensor), "matmul requires another tensor."
        result = Tensor(np.dot(self.data, other.data), requires_grad=self.requires_grad or other.requires_grad)
        if self.requires_grad:
            result._grad_fn = ('matmul', self, other)
        return result
    
    def sum(self):
        result = Tensor(np.sum(self.data), requires_grad=self.requires_grad)
        if self.requires_grad:
            result._grad_fn = ('sum', self)
        return result

    def backward(self, grad=None):
        if self.grad is None:
            raise RuntimeError("Gradients are not being tracked for this tensor.")
        if grad is None:
            grad = np.ones_like(self.data)
        self.grad += grad
        self._backward()

    def _backward(self):
        if self._grad_fn is None:
            return

        op, left, right = self._grad_fn
        if op == 'add':
            if left.requires_grad:
                left.grad += self.grad
            if isinstance(right, Tensor) and right.requires_grad:
                right.grad += self.grad
            left._backward()
            if isinstance(right, Tensor):
                right._backward()
        elif op == 'mul':
            if left.requires_grad:
                left.grad += self.grad * (right.data if isinstance(right, Tensor) else right)
            if isinstance(right, Tensor) and right.requires_grad:
                right.grad += self.grad * left.data
            left._backward()
            if isinstance(right, Tensor):
                right._backward()
        elif op == 'matmul':
            if left.requires_grad:
                left.grad += np.dot(self.grad, right.data.T)
            if right.requires_grad:
                right.grad += np.dot(left.data.T, self.grad)
            left._backward()
            right._backward()
        elif op == 'sum':
            if left.requires_grad:
                left.grad += self.grad
            left._backward()

In [13]:
# Create scalar tensors with gradient tracking
a = Tensor(2.0, requires_grad=True)
b = Tensor(3.0, requires_grad=True)

# Scalar operations
c = a * b + a
c.backward()

print("Scalar operations:")
print("Value of a:", a.data)
print("Gradient of a:", a.grad)
print("Value of b:", b.data)
print("Gradient of b:", b.grad)

# Create matrix tensors with gradient tracking
x = Tensor([[1, 2], [3, 4]], requires_grad=True)
y = Tensor([[5, 6], [7, 8]], requires_grad=True)

# Matrix operations
z = x.matmul(y) + x
z = x.sum()
print(z.data)
z.backward()

print("\nMatrix operations:")
print("Value of x:", x.data)
print("Gradient of x:", x.grad)
print("Value of y:", y.data)
print("Gradient of y:", y.grad)


Scalar operations:
Value of a: 2.0
Gradient of a: 4.0
Value of b: 3.0
Gradient of b: 2.0
10


ValueError: not enough values to unpack (expected 3, got 2)