# Example usage

To use `meerkat_dl` in a project:

In [1]:
import mdl

print(mdl.__version__)

0.1.0


In [2]:
from mdl.net.layer import LinearLayer
import numpy as np

from mdl.net.activation import ReLU
from mdl.net.optimizers import GradientDescent
from mdl.net.loss import MeanSquaredErrorLoss
from mdl.tensor import Tensor, Parameter
from mdl.autodiff.dcgraph import DCGraph
from mdl.autodiff.linear import Linear

In [3]:
input_a = Tensor(np.array([[1,2,3],[1,2,3]]), requires_grad=False)
parameter_b = Tensor(np.array([[1,2,3],[1,2,3]]), requires_grad=True)
target_c = Tensor(np.array([[3,4,5],[6,7,8]]), requires_grad=False)

sum_fn = input_a + parameter_b
abs_error = target_c - sum_fn
sq_abs_error = abs_error ** 2
sum_sq_abs_error = sq_abs_error.sum()
mean_sum_sq_abs_error = sum_sq_abs_error / Tensor(sum_fn.shape[0])

global_graph = DCGraph()

(2, 3)
(2, 3)
(2, 3)
(2, 3)
()


In [4]:
mean_sum_sq_abs_error.backward()

global_graph.reset_graph()

global_graph

backprop calculation
curr tensor: Tensor(15.5)
curr tensor shape: ()
backprop calculation
curr tensor: Tensor(31.0)
curr tensor shape: ()
child tensor: Tensor(15.5)
child parent bshap: ()
unbroadcast axes to be summed: ()
backprop calculation
curr tensor: Tensor([[ 1.  0.  1.]
 [16.  9.  4.]])
curr tensor shape: (2, 3)
child tensor: Tensor(31.0)
child parent bshap: (2, 3)
unbroadcast axes to be summed: ()
backprop calculation
curr tensor: Tensor([[ 1.  0. -1.]
 [ 4.  3.  2.]])
curr tensor shape: (2, 3)
child tensor: Tensor([[ 1.  0.  1.]
 [16.  9.  4.]])
child parent bshap: (2, 3)
unbroadcast axes to be summed: ()
backprop calculation
curr tensor: Tensor([[3. 4. 5.]
 [6. 7. 8.]])
curr tensor shape: (2, 3)
backprop calculation
curr tensor: Tensor([[2. 4. 6.]
 [2. 4. 6.]])
curr tensor shape: (2, 3)
child tensor: Tensor([[ 1.  0. -1.]
 [ 4.  3.  2.]])
child parent bshap: (2, 3)
unbroadcast axes to be summed: ()
backprop calculation
curr tensor: Tensor([[1. 2. 3.]
 [1. 2. 3.]])
curr tensor

DCGraph({'_tensor_nodes': set()})

In [5]:
print(abs_error in global_graph.tensor_nodes)
abs_error.backward(np.array([[1.,1.,1.],[1.,1.,1.]]))

False
backprop calculation
curr tensor: Tensor([[ 1.  0. -1.]
 [ 4.  3.  2.]])
curr tensor shape: (2, 3)
child tensor: Tensor([[ 1.  0.  1.]
 [16.  9.  4.]])
child parent bshap: (2, 3)
unbroadcast axes to be summed: ()
backprop calculation
curr tensor: Tensor([[3. 4. 5.]
 [6. 7. 8.]])
curr tensor shape: (2, 3)
backprop calculation
curr tensor: Tensor([[2. 4. 6.]
 [2. 4. 6.]])
curr tensor shape: (2, 3)
child tensor: Tensor([[ 1.  0. -1.]
 [ 4.  3.  2.]])
child parent bshap: (2, 3)
unbroadcast axes to be summed: ()
backprop calculation
curr tensor: Tensor([[1. 2. 3.]
 [1. 2. 3.]])
curr tensor shape: (2, 3)
backprop calculation
curr tensor: Tensor([[1. 2. 3.]
 [1. 2. 3.]])
curr tensor shape: (2, 3)
child tensor: Tensor([[2. 4. 6.]
 [2. 4. 6.]])
child parent bshap: (2, 3)
unbroadcast axes to be summed: ()


In [6]:
input_size, output_size = 2, 1
batch_size = 1

# Create an instance of the Linear operation
linear = Linear(input_size, output_size)

# Generate random input tensor
input_tensor = Tensor(np.random.rand(batch_size, input_size))
print(f"input tensor: {input_tensor}")
# Generate a random target tensor for loss calculation
target_tensor = Tensor(np.random.rand(batch_size, output_size))
print(f"target tensor: {target_tensor}")
# Instantiate the real loss function from your framework
loss_fn = MeanSquaredErrorLoss()  # Using the correct class name

input tensor: Tensor([[0.92835057 0.00568409]])
target tensor: Tensor([[0.38320547]])


In [7]:
lin_output = linear(input_tensors=[input_tensor])

print(f"lin output: {lin_output}")

(2, 2)
lin output: Tensor([[-0.70548725]])


In [8]:
lin_output.backward([[1.]])

backprop calculation
curr tensor: Tensor([[-0.70548725]])
curr tensor shape: (1, 1)
backprop calculation
curr tensor: Tensor([[0.92835057 0.00568409]])
curr tensor shape: (1, 2)
backprop calculation
curr tensor: Tensor([[-0.76514035]
 [ 0.84996504]])
curr tensor shape: (2, 1)
child tensor: Tensor([[-0.70548725]])
child parent bshap: (2, 2)
unbroadcast axes to be summed: (1,)
backprop calculation
curr tensor: Tensor([[0.]])
curr tensor shape: (1, 1)
child tensor: Tensor([[-0.70548725]])
child parent bshap: (2, 2)
unbroadcast axes to be summed: (0, 1)


AxisError: axis 1 is out of bounds for array of dimension 1

In [17]:
mean_sum_sq_abs_error.grad


array(1.)

In [7]:
print(test_sum.parent_broadcast_shape)
print(test_sum.grad)
print(a.grad)
print(b.grad)

(1, 3)
[[0. 0. 0.]]
[[0. 0. 0.]]
[[0. 0. 0.]]


In [10]:
a.backward(output_grad=np.array([[1.,1.,1.]]))

()


In [11]:
test_sum.backward(output_grad=np.array([[1.,1.,1.]]))

()
()


In [3]:
np.random.seed(42)
num_samples = 5
input_size = 5
output_size = 1
X = np.random.rand(num_samples, input_size).astype(np.float32)
true_weights = np.random.rand(input_size, output_size).astype(np.float32)
true_bias = np.random.rand(output_size).astype(np.float32)
y = np.dot(X, true_weights) + true_bias

In [4]:
X_tensor = Tensor(X)
y_tensor = Tensor(y)

In [5]:
X_tensor

Tensor([[0.37454012 0.9507143  0.7319939  0.5986585  0.15601864]
 [0.15599452 0.05808361 0.8661761  0.601115   0.7080726 ]
 [0.02058449 0.96990985 0.83244264 0.21233912 0.18182497]
 [0.1834045  0.30424225 0.52475643 0.43194503 0.29122913]
 [0.6118529  0.13949387 0.29214466 0.36636186 0.45606998]])

In [7]:
model = LinearLayer(input_size, output_size)

In [8]:
num_epochs = 100
learning_rate = 0.01

optimizer = GradientDescent(model.aggregate_parameters_as_list(), learning_rate=learning_rate)

In [9]:
activation_fn = ReLU()
loss_fn = MeanSquaredErrorLoss()

In [10]:
for epoch in range(num_epochs):
    
    pred = model(X_tensor)
    activated_pred = activation_fn([pred])
    
    loss = loss_fn(activated_pred, y_tensor)
    
    loss.backward()
    
    optimizer.step()
    
    optimizer.zero_grad()
    
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.data}')

Epoch [100/100], Loss: 0.2983683943748474


In [7]:
import numpy as np

def gradient_checker(tensor, epsilon=1e-5):
    # Store the original tensor data
    original_data = tensor.data.copy()

    # Compute the gradients using automatic differentiation
    tensor.backward(np.array(1.0))
    autograd_grad = tensor.grad.copy()

    # Reset the tensor data to the original values
    tensor.data = original_data

    # Initialize an array to store the numerical gradients
    num_grad = np.zeros_like(original_data)

    # Iterate over each element in the tensor and compute the numerical gradient
    for idx in np.ndindex(original_data.shape):
        # Perturb the current element by epsilon
        tensor.data[idx] += epsilon

        # Compute the forward pass with the perturbed tensor
        perturbed_result = tensor.data.sum()  # Adjust as needed based on the operation

        # Reset the tensor data to the original values
        tensor.data = original_data

        # Perturb the current element by -epsilon
        tensor.data[idx] -= epsilon

        # Compute the forward pass with the perturbed tensor
        perturbed_result -= tensor.data.sum()  # Adjust as needed based on the operation

        # Reset the tensor data to the original values
        tensor.data = original_data

        # Compute the numerical gradient for the current element
        num_grad[idx] = perturbed_result / (2 * epsilon)

    # Compute the relative error between autograd and numerical gradients
    print(autograd_grad)
    print(num_grad)
    rel_error = np.linalg.norm(autograd_grad - num_grad) / np.linalg.norm(autograd_grad + num_grad)

    return rel_error


In [8]:
from mdl.tensor import Tensor

In [9]:
# Complex scenario with multiple operations and backward pass
tensor_a = Tensor(np.array([1, 2, 3]), requires_grad=True)
tensor_b = Tensor(np.array([4, 5, 6]), requires_grad=True)
tensor_c = Tensor(np.array([7, 8, 9]), requires_grad=True)

# Operations
result_add = tensor_a + tensor_b
result_mul = result_add * tensor_c
result_sum = result_mul.sum()

# Simulating backward pass
output_grad = np.array(1.0)
result_sum.backward(output_grad)

# Verify gradients using the gradient checker
error_a = gradient_checker(tensor_a)
error_b = gradient_checker(tensor_b)
error_c = gradient_checker(tensor_c)

# Check the errors
print(f"Gradient Checker Error for tensor_a: {error_a}")
print(f"Gradient Checker Error for tensor_b: {error_b}")
print(f"Gradient Checker Error for tensor_c: {error_c}")

deque([Tensor(172.0), Tensor([35. 56. 81.]), Tensor([5. 7. 9.]), Tensor([1. 2. 3.]), Tensor([4. 5. 6.]), Tensor([7. 8. 9.])])
Tensor(172.0)
Tensor([35. 56. 81.])
[Tensor(172.0)]
1.0
[0. 0. 0.]
[1. 1. 1.]
Tensor([5. 7. 9.])
[Tensor([35. 56. 81.])]
[1. 1. 1.]
[0. 0. 0.]
[7. 8. 9.]
Tensor([1. 2. 3.])
[Tensor([5. 7. 9.])]
[7. 8. 9.]
[0. 0. 0.]
[7. 8. 9.]
Tensor([4. 5. 6.])
[Tensor([5. 7. 9.])]
[7. 8. 9.]
[0. 0. 0.]
[7. 8. 9.]
Tensor([7. 8. 9.])
[Tensor([35. 56. 81.])]
[1. 1. 1.]
[0. 0. 0.]
[5. 7. 9.]


Exception: Shapes of gradient and Tensor need to match.