In [None]:
import numpy as np


class Tensor:
    def __init__(self, data, requires_grad=False):
        self.data = np.array(data, dtype=np.float32)
        self.requires_grad = requires_grad
        self.grad = None
        self._grad_fn = None

    def set_grad_fn(self, grad_fn):
        self._grad_fn = grad_fn

    def backward(self, grad=None):
        if grad is None:
            grad = np.ones_like(self.data)

        if self.grad is None:
            self.grad = grad
        else:
            self.grad += grad

        if self._grad_fn is not None:
            self._grad_fn.backward(grad)

    def __add__(self, other):
        return Add.apply(self, other)

    def __mul__(self, other):
        return Mul.apply(self, other)

    def __neg__(self):
        return Neg.apply(self)

    def __sub__(self, other):
        return self + (-other)

    def __truediv__(self, other):
        return self * (other**-1)

    def __pow__(self, power):
        return Pow.apply(self, power)

    def matmul(self, other):
        return MatMul.apply(self, other)

    def sum(self, axis=None, keepdims=False):
        return Sum.apply(self, axis, keepdims)

    def __repr__(self):
        return f"Tensor(data={self.data}, requires_grad={self.requires_grad})"

In [None]:
class Add:
    @staticmethod
    def apply(a, b):
        c = Tensor(a.data + b.data, requires_grad=a.requires_grad or b.requires_grad)
        c.set_grad_fn(Add(a, b))
        return c

    def __init__(self, a, b):
        self.a = a
        self.b = b

    def backward(self, grad):
        if self.a.requires_grad:
            self.a.backward(grad)
        if self.b.requires_grad:
            self.b.backward(grad)


class Mul:
    @staticmethod
    def apply(a, b):
        c = Tensor(a.data * b.data, requires_grad=a.requires_grad or b.requires_grad)
        c.set_grad_fn(Mul(a, b))
        return c

    def __init__(self, a, b):
        self.a = a
        self.b = b

    def backward(self, grad):
        if self.a.requires_grad:
            self.a.backward(grad * self.b.data)
        if self.b.requires_grad:
            self.b.backward(grad * self.a.data)


class Neg:
    @staticmethod
    def apply(a):
        c = Tensor(-a.data, requires_grad=a.requires_grad)
        c.set_grad_fn(Neg(a))
        return c

    def __init__(self, a):
        self.a = a

    def backward(self, grad):
        if self.a.requires_grad:
            self.a.backward(-grad)


class Pow:
    @staticmethod
    def apply(a, power):
        c = Tensor(a.data**power, requires_grad=a.requires_grad)
        c.set_grad_fn(Pow(a, power))
        return c

    def __init__(self, a, power):
        self.a = a
        self.power = power

    def backward(self, grad):
        if self.a.requires_grad:
            self.a.backward(grad * self.power * (self.a.data ** (self.power - 1)))


class MatMul:
    @staticmethod
    def apply(a, b):
        c = Tensor(
            np.dot(a.data, b.data), requires_grad=a.requires_grad or b.requires_grad
        )
        c.set_grad_fn(MatMul(a, b))
        return c

    def __init__(self, a, b):
        self.a = a
        self.b = b

    def backward(self, grad):
        if self.a.requires_grad:
            self.a.backward(np.dot(grad, self.b.data.T))
        if self.b.requires_grad:
            self.b.backward(np.dot(self.a.data.T, grad))


class Sum:
    @staticmethod
    def apply(a, axis=None, keepdims=False):
        data = a.data.sum(axis=axis, keepdims=keepdims)
        c = Tensor(data, requires_grad=a.requires_grad)
        c.set_grad_fn(Sum(a, axis, keepdims))
        return c

    def __init__(self, a, axis, keepdims):
        self.a = a
        self.axis = axis
        self.keepdims = keepdims

    def backward(self, grad):
        if self.a.requires_grad:
            shape = np.ones_like(self.a.data.shape)
            if self.axis is not None:
                shape[self.axis] = self.a.data.shape[self.axis]
            self.a.backward(grad.reshape(shape) if not self.keepdims else grad)

In [None]:
class Linear:
    def __init__(self, in_features, out_features):
        self.weights = Tensor(np.random.randn(in_features, out_features) * np.sqrt(2. / in_features), requires_grad=True)
        self.bias = Tensor(np.zeros(out_features), requires_grad=True)

    def __call__(self, x):
        return self.forward(x)

    def forward(self, x):
        return x.matmul(self.weights) + self.bias

    def parameters(self):
        return [self.weights, self.bias]


In [None]:
class SGD:
    def __init__(self, parameters, lr=0.01):
        self.parameters = parameters
        self.lr = lr

    def step(self):
        for param in self.parameters:
            if param.grad is not None:
                param.data -= self.lr * param.grad

    def zero_grad(self):
        for param in self.parameters:
            param.grad = None


In [None]:
# Generate synthetic dataset
np.random.seed(42)
X = np.random.rand(100, 1)
y = 3 * X + 2 + np.random.randn(100, 1) * 0.1  # y = 3x + 2 + noise

# Convert to tensors
X_tensor = Tensor(X, requires_grad=False)
y_tensor = Tensor(y, requires_grad=False)

# Define a simple linear model
class SimpleLinearModel:
    def __init__(self):
        self.linear = Linear(1, 1)

    def __call__(self, x):
        return self.linear(x)

    def parameters(self):
        return self.linear.parameters()

# Initialize model and optimizer
model = SimpleLinearModel()
optimizer = SGD(model.parameters(), lr=0.01)

# Training loop
epochs = 1000
for epoch in range(epochs):
    # Forward pass
    predictions = model(X_tensor)
    loss = ((predictions - y_tensor) ** 2).sum()  # Mean Squared Error

    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss.data}")

# Print final parameters
print(f"Weights: {model.linear.weights.data.flatten()}")
print(f"Bias: {model.linear.bias.data}")
