In [114]:
%load_ext autoreload
%autoreload 3

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [115]:
import pytest
import numpy as np
from minitorch.tensor.tensor import Tensor
from minitorch.activations.activations import (
    Softmax,
    Sigmoid,
    ReLU,
    GELU,
    Tanh
)
from minitorch.losses.losses import MSE, SoftMaxCrossEntropy

In [None]:
x1 = Tensor(np.random.randn(1,5), requires_grad=True)
weight1 = Tensor(np.random.rand(5,5), requires_grad=True)
bias1 = Tensor(np.random.randn(5,), requires_grad=True)

predicted = x1 @ weight1 + bias1
true = Tensor(np.random.randn(1,5), requires_grad=True)

loss_fn = MSE()
loss = loss_fn(predicted, true)
loss.backward()

In [127]:
predicted

Tensor(data=[[-1.05527259  2.3816479   0.45572236  0.46083004 -0.40211405]], shape=(1, 5), grad_info= requires_grad=True)

In [59]:
x1.grad, weight1.grad, bias1.grad, predicted.grad,loss.grad

(array([[0.3402532, 1.0009477, 2.0437615, 1.3149291, 1.5192282]],
       dtype=float32),
 array([[ 0.55725867,  0.8601358 ,  1.1983076 ,  0.36606446,  0.3420838 ],
        [-0.42049533, -0.64903986, -0.90421706, -0.27622432, -0.25812903],
        [-0.15923941, -0.24578805, -0.34242234, -0.10460472, -0.09775213],
        [ 0.02622831,  0.04048374,  0.05640036,  0.01722944,  0.01610075],
        [-0.77281815, -1.1928546 , -1.6618385 , -0.50766593, -0.47440907]],
       dtype=float32),
 array([0.44210288, 0.6823914 , 0.9506811 , 0.29041836, 0.27139324],
       dtype=float32),
 array([[0.44210288, 0.6823914 , 0.9506811 , 0.29041836, 0.27139324]],
       dtype=float32),
 array(1., dtype=float32))

In [77]:
logits.shape, targets.shape

((8, 5), (8,))

In [79]:
logits = Tensor(np.random.randn(8,5), requires_grad=True)
targets = Tensor(np.random.randint(0,5, size=(8,)))

loss = SoftMaxCrossEntropy()(logits, targets)
loss.backward()

In [166]:
from typing import List


from numpy.typing import NDArray


class Parameter(Tensor):
    #code 
    def __init__(self, data) -> None:
        if isinstance(data, Tensor):
            data = data.data # convert to raw numpy
        else:
            data = data
        super().__init__(data, requires_grad=True)
        
    def zero_grad(self):
        if self.grad is not None:
            self.grad = np.zeros_like(self.data, dtype=np.float32)
        
    def detach(self):
        raise RuntimeError(
            'Cannot detach a parameter. Convert to Tensor first if intentional'
        )
        
    def __repr__(self):
        return (
            f"Parameter(Tensor(data={self.data},\n "
            f"shape={self.shape},\n "
            f"dtype={self.dtype},\n "
            f"requires_grad=True\n "
            )

# def _get_parameters(data):
#     params = []
#     if isinstance(data, Parameter):
#         return [data] 
#     if isinstance(data, dict):
#         for value in data.values(): #calling _get_parameters recursively
#             if isinstance(value, Tensor):
#                 params.extend(_get_parameters(value))
#     if isinstance(data, (list, tuple)):
#         for item in data:
#             params.extend(_get_parameters(item))
#     return params

# params = _get_parameters(loss.__dict__)

In [167]:
x1 = Parameter(x1)
w1 = Parameter(weight1)
b1 = Parameter(bias1)

In [161]:
x1 @ w1 + b1

Tensor(data=[[-1.05527259  2.3816479   0.45572236  0.46083004 -0.40211405]], shape=(1, 5), grad_info= requires_grad=True)

In [168]:
x1, w1, b1

(Parameter(Tensor(data=[[ 0.84963405 -0.32149822  0.19475245 -0.55565571  1.31958522]],
  shape=(1, 5),
  dtype=float64,
  requires_grad=True
  ,
 Parameter(Tensor(data=[[0.261551   0.56524425 0.59140681 0.37957461 0.21464685]
  [0.48893798 0.39787141 0.38474076 0.15150718 0.65428752]
  [0.47239929 0.38617826 0.90206245 0.2332706  0.95690201]
  [0.48370038 0.65408458 0.88756076 0.88662014 0.61565585]
  [0.32841528 0.61004317 0.54113322 0.99840737 0.18026287]],
  shape=(5, 5),
  dtype=float64,
  requires_grad=True
  ,
 Parameter(Tensor(data=[-1.37690453  1.5125448  -0.3196356  -0.68321827 -0.45627158],
  shape=(5,),
  dtype=float64,
  requires_grad=True
  )

In [24]:
x1 = Tensor(np.random.randn(1,5), requires_grad=True)
weight1 = Tensor(np.random.rand(5,3), requires_grad=True)
bias1 = Tensor(np.random.randn(3,), requires_grad=True)

y1 = x1 @ weight1 + bias1
fn1 = Softmax()
Z1 = fn1(y1)
Z1.backward()

Tensor(data=Tensor(data=12.333215271265423), shape=(), grad_info= requires_grad=True)

In [112]:
def tensor(data, requires_grad=True):
    """create a tensor using the Tensor class

    Args:
        data (list,ndarray): data to use to create the tensor
        requires_grad (bool, optional): _description_. Defaults to True.

    Returns:
        Tensor: N-dimension tensor array storing the data
    """
    return Tensor(np.array(data, dtype= np.float32), requires_grad=requires_grad)

In [79]:
def test_add_backward_basic():
    a = tensor([1.0, 2.0])
    b = tensor([3.0, 4.0])
    grad_out = tensor([1.0, 1.0], requires_grad=False)

    fn = AddBackward(a, b)
    grad_a, grad_b = fn(grad_out)
    print(grad_a, grad_b)

    np.testing.assert_allclose(grad_a.data, grad_out.data)
    np.testing.assert_allclose(grad_b.data, grad_out.data)


def test_add_backward_requires_grad_respected():
    a = tensor([1.0, 2.0], requires_grad=False)
    b = tensor([3.0, 4.0], requires_grad=True)
    grad_out = tensor([1.0, 1.0], requires_grad=False)

    fn = AddBackward(a, b)
    grad_a, grad_b = fn(grad_out)
    print(grad_a, grad_b)
    
    test_grad = Tensor(np.zeros_like(a.data))
    
    np.testing.assert_allclose(grad_a.data, test_grad.data)
    np.testing.assert_allclose(grad_b.data, grad_out.data)


def test_add_backward_invalid_input():
    with pytest.raises(AssertionError):
        fn = AddBackward(1, 2)
        fn(tensor(1.0))
        
test_add_backward_basic()
test_add_backward_requires_grad_respected()
test_add_backward_invalid_input()

Tensor(data=[1. 1.]) Tensor(data=[1. 1.])
Tensor(data=[0. 0.]) Tensor(data=[1. 1.])


In [80]:
def test_sub_backward_basic():
    a = tensor([1.0, 2.0])
    b = tensor([3.0, 4.0])
    grad_out = tensor([1.0, 1.0], requires_grad=False)

    fn = SubBackward(a, b)
    grad_a, grad_b = fn(grad_out)
    print(grad_a, grad_b)
    

    np.testing.assert_allclose(grad_a.data, grad_out.data)
    np.testing.assert_allclose(grad_b.data, -grad_out.data)


def test_sub_backward_requires_grad_respected():
    a = tensor([1.0, 2.0], requires_grad=False)
    b = tensor([3.0, 4.0], requires_grad=False)
    grad_out = tensor([1.0, 1.0], requires_grad=False)

    fn = AddBackward(a, b)
    grad_a, grad_b = fn(grad_out)
    test_grad = Tensor(np.zeros_like(a.data))
    print(grad_a, grad_b)
    np.testing.assert_allclose(grad_a.data, test_grad.data)
    np.testing.assert_allclose(grad_b.data, test_grad.data)


def test_add_backward_invalid_input():
    with pytest.raises(AssertionError):
        fn = SubBackward(1, 2)
        fn(tensor(1.0))
   
test_sub_backward_basic()
test_sub_backward_requires_grad_respected()
test_add_backward_invalid_input()

Tensor(data=[1. 1.]) Tensor(data=[-1. -1.])
Tensor(data=[0. 0.]) Tensor(data=[0. 0.])


In [85]:
def test_div_backward_basic():
    a = tensor([6.0, 8.0])
    b = tensor([2.0, 4.0])
    grad_out = tensor([20.0, 16.0], requires_grad=False)

    fn = DivBackward(a, b)
    grad_a, grad_b = fn(grad_out)
    print(grad_a, grad_b)
    
    np.testing.assert_allclose(
        grad_a.data,
        grad_out.data / b.data
    )

    np.testing.assert_allclose(
        grad_b.data,
        -grad_out.data * a.data / (b.data ** 2)
    )


def test_div_backward_requires_grad_respected():
    a = tensor([6.0, 8.0], requires_grad=False)
    b = tensor([2.0, 4.0], requires_grad=True)
    grad_out = tensor([1.0, 1.0], requires_grad=False)

    fn = DivBackward(a, b)
    grad_a, grad_b = fn(grad_out)
    test_grad = Tensor(np.zeros_like(grad_out.data))
    print(grad_a, grad_b)

    np.testing.assert_allclose(grad_a.data, test_grad.data)
    np.testing.assert_allclose(
        grad_b.data,
        -grad_out.data * a.data / (b.data ** 2)
    )
    
test_div_backward_basic()
test_div_backward_requires_grad_respected()

Tensor(data=[10.  4.]) Tensor(data=[-30.  -8.])
Tensor(data=[0. 0.]) Tensor(data=[-1.5 -0.5])


In [135]:
def test_transpose_backward_basic():
    a = tensor([[6.0, 8.0], [2.0, 4.0]])
    # b = tensor([2.0, 4.0])
    grad_out = tensor([[20.0, 16.0], [18.0, 12.0]], requires_grad=False)

    fn = TransposeBackward(a,0,1)
    grad_a = fn(grad_out)
    test_grad = grad_out.transpose(0,1)
    
    print(grad_a)
    print(test_grad)
    np.testing.assert_allclose(
        grad_a[0].data,
        test_grad.data
    )



test_transpose_backward_basic()

(Tensor(data=[[20. 18.]
 [16. 12.]], shape=(2, 2), grad_info= None),)
Tensor(data=[[20. 18.]
 [16. 12.]])


In [142]:
Tensor().grad

TypeError: Tensor.__init__() missing 1 required positional argument: 'data'

In [None]:
def enable_autograd(quiet=False):
    #* GUARD: prevent double patching
    if hasattr(Tensor, '_autograd_enabled'):
        #* silently return the tensor if already enable - no need to warn 
        return 
    
    #*======= STEP 1 : Add gradient infrastructure to tensor ========
    _original_init = Tensor.__init__ # store the original init to extend it

    def gradient_aware_init(self, data, requires_gradient=False):
        """Extend Tensor init to support gradinet tracking"""
        _original_init(self, data)
        self.requires_grad = requires_gradient
        self.grad = None
        
    Tensor.__init__ = gradient_aware_init

In [143]:
_original_init = Tensor.__init__

In [145]:
_original_init(self,data)

NameError: name 'self' is not defined