In [None]:
from typing import Tuple

import torch
import numpy as np

import minitorch.autodiff.tensor_functions as tf
from minitorch import operators
from minitorch.module import LinearTensorLayer, Parameter
from minitorch.autodiff import Context, Tensor, topological_sort
from minitorch.autodiff.tensor_ops import SimpleBackend

In [None]:
input_dim, output_dim = 2, 1
weights = LinearTensorLayer._initialise_parameter(input_dim, output_dim).value
bias = LinearTensorLayer._initialise_parameter(output_dim).value

# Generate some input data
n_samples = 10
inputs = tf.rand((n_samples, input_dim))
targets = tf.tensor([1, 1, 1, 0, 0, 0, 1, 0, 1, 0])

# Forward
inputs = inputs.view(*inputs.shape, 1)
_weights = weights.view(1, *weights.shape)

out = (inputs * _weights).sum(dim=1)
predictions = out.view(inputs.shape[0], bias.size) + bias
predictions = predictions.view(targets.size).sigmoid()

predictions_ = (predictions * targets) + (predictions - 1.0) * (targets - 1.0)
predictions_sum = predictions_.sum()
predictions_sum.backward()

# Compute loss
# probas = (predictions * targets) + (predictions - 1.0) * (targets - 1.0)
# loss = ((-probas.log()) / targets.size).sum()
# loss.backward()


In [None]:
# Test loss
targets = tf.tensor([1, 1, 1, 0, 0, 0, 1, 0, 1, 0])
targets.name = "targets"

out = tf.rand((10, ), requires_grad=True)
out.name = "out"

predictions = out.sigmoid()
predictions.name = "predictions"

predictions_x_targets = predictions * targets
predictions_x_targets.name = "predictions_x_targets"

predictions_m_1 = (predictions - 1.0)
predictions_m_1.name = "predictions_m_1"

targets_m_1 = (targets - 1.0)
targets_m_1.name = "targets_m_1"

pred_m_1_x_target_m_1 = predictions_m_1 * targets_m_1
pred_m_1_x_target_m_1.name = "pred_m_1_x_target_m_1"

predictions_ = predictions_x_targets + pred_m_1_x_target_m_1
predictions_.name = "predictions_"

In [None]:
diff_chain = topological_sort(predictions_)

In [None]:
[t.name for t in diff_chain]

In [None]:
predictions_.sum().backward()

In [None]:
print(f"out grad {out.grad}")
print(f"predictions grad {predictions.grad}")

In [None]:
## Compare to torch
torch_targets = torch.tensor(targets.data.storage)
torch_out = torch.tensor(out.data.storage, requires_grad=True)
torch_predictions = torch_out.sigmoid()
torch_predictions.retain_grad = True

torch_predictions_ = (torch_predictions * torch_targets) + (torch_predictions - 1.0) * (torch_targets - 1.0)
torch_predictions_.sum().backward()

In [None]:
print(f"out grad {torch_out.grad}")
print(f"predictions grad {predictions.grad}")

In [None]:
# Test loss without sigmoid
predictions = tf.rand((10, ), requires_grad=True)
targets = tf.tensor([1, 1, 1, 0, 0, 0, 1, 0, 1, 0])

predictions_ = (predictions * targets) + (predictions - 1.0) * (targets - 1.0)
predictions_.sum().backward()

predictions.grad

In [None]:
## Compare to torch
torch_predictions = torch.tensor(predictions.data.storage, requires_grad=True)
torch_targets = torch.tensor(targets.data.storage)

torch_predictions_ = (torch_predictions * torch_targets) + (torch_predictions - 1.0) * (torch_targets - 1.0)
torch_predictions_.sum().backward()

torch_predictions.grad