In [1]:
from typing import Tuple

import numpy as np

import minitorch.autodiff.tensor_functions as tf
from minitorch import operators
from minitorch.autodiff import Context, Tensor, topological_sort
from minitorch.autodiff.tensor_ops import SimpleBackend

In [2]:
class TensorFunction2(tf.TensorFunction):
    @classmethod
    def _forward(cls, ctx: Context, x: Tensor, y: Tensor) -> Tensor:
        """f(x, y) = x * y + x"""
        ctx.save_for_backward(x, y)
        return x.func.add_zip(x.func.mul_zip(x, y), x)

    @classmethod
    def _backward(cls, ctx: Context, grad_out: Tensor) -> Tuple[Tensor, ...]:
        """f'_x(x, y) = y + 1 ; f'_y(x, y) = x"""
        x, y = ctx.saved_tensors
        df_dx = grad_out.func.mul_zip(grad_out, y.func.add_zip(y, y._ensure_tensor(1)))
        df_dy = grad_out.func.mul_zip(grad_out, x)
        return df_dx, df_dy

In [3]:
# Create tensors
x = tf.ones(shape=(3,)) * 0.5
x.requires_grad = True
x.name = "tensor x"

y = tf.ones(shape=(5, 1)) * 0.75
y.requires_grad = True
y.name = "tensor y"

z = tf.ones(shape=(1, 3)) * 0.5
z.requires_grad = True
z.name = "tensor z"


w = tf.ones(shape=(5, 3)) * 0.25
w.requires_grad = True
w.name = "tensor w"

In [4]:
# Forward - once
out1 = TensorFunction2.apply(x, y)
out1.name = "tensor out1"
out2 = TensorFunction2.apply(out1, z)
out2.name = "tensor out2"
out3 = TensorFunction2.apply(out2, w)
out3.name = "tensor out3"

diff_chain = topological_sort(out3)

In [5]:
[t.name for t in diff_chain]

['tensor out3',
 'tensor out2',
 'tensor w',
 'tensor out1',
 'tensor z',
 'tensor x',
 'tensor y']

In [6]:
# Backward
grad_out = Tensor.make(
    [0.5 for _ in range(out3.size)],
    shape=out3.shape,
    backend=SimpleBackend,
)
grad_out.name = "grad out"
out3.backward(grad_out=grad_out)

running back prop for var = tensor out3
computed diffs wrt. inputs ['tensor out2', 'tensor w']
pased down upstream diff grad out
0.0
0.0
=====
running back prop for var = tensor out2
computed diffs wrt. inputs ['tensor out1', 'tensor z']
pased down upstream diff diff tensor out3 wrt. tensor out2
0.0
0.0
=====
running back prop for var = tensor w
skipping var = tensor w - its a leaf
=====
running back prop for var = tensor out1
computed diffs wrt. inputs ['tensor x', 'tensor y']
pased down upstream diff diff tensor out2 wrt. tensor out1
0.0
0.0
=====
running back prop for var = tensor z
skipping var = tensor z - its a leaf
=====
running back prop for var = tensor x
skipping var = tensor x - its a leaf
=====
running back prop for var = tensor y
skipping var = tensor y - its a leaf
=====


In [None]:

out2 = TensorFunction2.apply(out1, z)
out3 = TensorFunction2.apply(out2, w)



# Check derivatives - calculated from torch.
# expected_grad = np.array([8.2031 for _ in range(x.size)])
# assert x.derivative is not None
# assert x.derivative.shape == x.shape
# assert np.allclose(np.array(x.derivative.data.storage), expected_grad)

# expected_grad = np.array([1.4062 for _ in range(y.size)])
# assert y.derivative is not None
# assert y.derivative.shape == y.shape
# assert np.allclose(np.array(y.derivative.data.storage), expected_grad)

# expected_grad = np.array([2.7344 for _ in range(z.size)])
# assert z.derivative is not None
# assert z.derivative.shape == z.shape
# assert np.allclose(np.array(z.derivative.data.storage), expected_grad)

# expected_grad = np.array([0.6562 for _ in range(w.size)])
# assert w.derivative is not None
# assert w.derivative.shape == w.shape
# assert np.allclose(np.array(w.derivative.data.storage), expected_grad)

In [None]:
hash(s.id_)