In [1]:
import torch
torch.use_deterministic_algorithms(True)

import numpy as np
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt

from NumpyNN.NN_np import (
    FullyConnectedLayer,
    ReLULayer,
    SigmoidLayer,
    ReLULayer,
    AdamOptimizer,
    CrossEntropyLoss,
    LinearActivation,
    Sequential,
    Optimizer,
    SoftMaxLayer,
    GradientDescentOptimizer,
    CrossEntropyLossWithSoftMax,
    Conv2d,
    Flatten
)

from numpy_resnet import Bottleneck

plt.gray()


  from .autonotebook import tqdm as notebook_tqdm


<Figure size 432x288 with 0 Axes>

In [2]:
import sys
import os 
sys.path.append(
    sys.path[0].removesuffix("numpy_CNN") + "pytorch_implementations"
)

from resnet import Bottleneck as Bottleneck_torch

In [3]:
"""
FullyConnectedLayer test
"""

n_input_features = 6
n_output_features = 3
n_samples = 5
input_data = np.random.rand(n_samples, n_input_features).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(n_samples, n_output_features).astype(np.float32)

torch_fc = torch.nn.Linear(n_input_features, n_output_features)
torch_out = torch_fc(input_data_torch)
torch_out_np = torch_out.detach().numpy()
torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
torch_wg = torch_fc.weight.grad.detach().numpy().T
torch_bg = torch_fc.bias.grad.detach().numpy().reshape(-1, 1).T
# torch_input_g = input_data_torch.grad.detach().numpy()


my_fc = FullyConnectedLayer(n_input_features, n_output_features)
my_fc.weights = torch_fc.weight.detach().numpy().T
my_fc.bias = torch_fc.bias.detach().numpy().reshape(-1, 1).T
my_out = my_fc.forward(input_data)
my_input_g = my_fc.backward(output_gradient)
my_wg = my_fc.weights_gradient
my_bg = my_fc.bias_gradient


print("output all close:", np.allclose(my_out, torch_out_np))
print("w gradients all close:", np.allclose(my_wg, torch_wg))
print("b gradients all close:", np.allclose(my_bg, torch_bg))
print("input gradients all close:", np.allclose(my_input_g, input_data_torch.grad))
# print("input gradients all close:", np.allclose(my_input_g, torch_bg))

# print("all parameters shape same: ", my_fc.weights.shape == torch_fc.weight.T.shape and my_fc.bias.shape == torch.unsqueeze(torch_fc.bias, 1).shape)
# print("output sum of square dif:", np.square(my_out - torch_out_np).sum())
# print(torch_wg.sum(), my_wg.sum())
# print("w gradient sum of square dif:", np.square(my_wg - torch_wg).sum())

#print()
#print(my_wg)
#print()
#print(torch_wg)

output all close: True
w gradients all close: True
b gradients all close: True
input gradients all close: True


In [4]:
"""
CrossEntropyLoss test
"""
def one_hot(y: np.ndarray, n_classes: int):
    encoded = np.zeros((y.size, n_classes))
    encoded[np.arange(y.size), y] = 1
    return encoded


batch_size = 5
n_classes = 3
pred = np.random.rand(batch_size, n_classes).astype(np.float32)
true = one_hot(np.random.randint(0, n_classes, batch_size), n_classes)
pred_torch = torch.from_numpy(pred).float()
true_torch = torch.from_numpy(true).float()
pred_torch.requires_grad = True

torch_loss  = torch.nn.CrossEntropyLoss()
torch_loss_val = torch_loss(pred_torch, true_torch)
torch_loss_val.backward()

my_loss = CrossEntropyLossWithSoftMax()
my_loss_val = my_loss.forward(pred, true)
my_loss.backward()

print("loss_val all close:", np.allclose(my_loss_val, torch_loss_val.detach().numpy()))
print("loss gradients all close:", np.allclose(my_loss.backward(), pred_torch.grad))

loss_val all close: True
loss gradients all close: True


In [5]:
"""
ReLULayer test
"""

n_input_features = 6
n_output_features = 3
n_samples = 5
input_data = np.random.rand(n_samples, n_input_features).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(n_samples, n_input_features).astype(np.float32)


torch_relu = torch.nn.ReLU()
torch_out = torch_relu(input_data_torch)
torch_out_np = torch_out.detach().numpy()
torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
# torch_input_g = input_data_torch.grad.detach().numpy()


my_relu = ReLULayer()
my_out = my_relu.forward(input_data)


print("output all close:", np.allclose(my_out, torch_out_np))
print("input gradients all close:", np.allclose(my_relu.backward(output_gradient), input_data_torch.grad))

output all close: True
input gradients all close: True


In [6]:
"""
SigmoidLayer test
"""

n_input_features = 6
n_output_features = 3
n_samples = 5
input_data = np.random.rand(n_samples, n_input_features).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(n_samples, n_input_features).astype(np.float32)


torch_sigmoid = torch.nn.Sigmoid()
torch_out = torch_sigmoid(input_data_torch)
torch_out_np = torch_out.detach().numpy()
torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
# torch_input_g = input_data_torch.grad.detach().numpy()


my_sigmoid = SigmoidLayer()
my_out = my_sigmoid.forward(input_data)


print("output all close:", np.allclose(my_out, torch_out_np))
print("input gradients all close:", np.allclose(my_sigmoid.backward(output_gradient), input_data_torch.grad))


output all close: True
input gradients all close: True


In [7]:
"""
SigmoidLayer test on a 4D tensor
"""

n_input_channels = 3
n_samples = 2
height = 5
width = 5
input_data = np.random.rand(n_samples, n_input_channels, height, width).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(n_samples, n_input_channels, height, width).astype(np.float32)


torch_sigmoid = torch.nn.Sigmoid()
torch_out = torch_sigmoid(input_data_torch)
torch_out_np = torch_out.detach().numpy()
torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
# torch_input_g = input_data_torch.grad.detach().numpy()


my_sigmoid = SigmoidLayer()
my_out = my_sigmoid.forward(input_data)


print("output all close:", np.allclose(my_out, torch_out_np))
print("input gradients all close:", np.allclose(my_sigmoid.backward(output_gradient), input_data_torch.grad))
# print("input gradients all close:", np.allclose(my_input_g, torch_bg))

# print("all parameters shape same: ", my_fc.weights.shape == torch_fc.weight.T.shape and my_fc.bias.shape == torch.unsqueeze(torch_fc.bias, 1).shape)
# print("output sum of square dif:", np.square(my_out - torch_out_np).sum())
# print(torch_wg.sum(), my_wg.sum())
# print("w gradient sum of square dif:", np.square(my_wg - torch_wg).sum())

#print()
#print(my_wg)
#print()
#print(torch_wg)

output all close: True
input gradients all close: True


In [8]:
"""
FlattenLayer test
"""

n_input_channels = 3
n_samples = 2
height = 5
width = 5

input_data = np.random.rand(n_samples, n_input_channels, height, width).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(n_samples, n_input_channels * height * width).astype(np.float32)

my_flatten = Flatten()
my_out = my_flatten.forward(input_data)
my_out_g = my_flatten.backward(output_gradient)

torch_flatten = torch.nn.Flatten()
torch_out = torch_flatten(input_data_torch)
torch_out_np = torch_out.detach().numpy()

torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
torch_input_g = input_data_torch.grad.detach().numpy()

print("output all close:", np.allclose(my_out, torch_out_np))
print("input gradients all close:", np.allclose(my_out_g, torch_input_g))


output all close: True
input gradients all close: True


In [9]:
"""
Conv2DLayer test
"""

batch_size = 5
n_input_channels = 4
n_output_channels = 2
width = 5
height = 5

kernel_size = 3
stride = 1
padding = 1

output_width = (width + 2 * padding - kernel_size) // stride + 1
output_height = (height + 2 * padding - kernel_size) // stride + 1

input_data = np.random.rand(batch_size, n_input_channels, width, height).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(batch_size, n_output_channels, output_width, output_height).astype(np.float32)

torch_conv = torch.nn.Conv2d(n_input_channels, n_output_channels, kernel_size, stride, padding)

my_conv = Conv2d(n_input_channels, n_output_channels, kernel_size, stride, padding)
my_conv.weights = torch_conv.weight.detach().numpy()
my_conv.bias = torch_conv.bias.detach().numpy().reshape(-1, 1)

my_out = my_conv.forward(input_data)

torch_out = torch_conv(input_data_torch)
torch_out_np = torch_out.detach().numpy()
torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
torch_input_g = input_data_torch.grad.detach().numpy()
torch_wg = torch_conv.weight.grad.detach().numpy()
torch_bg = torch_conv.bias.grad.detach().numpy().reshape(-1, 1)

# print(torch_conv.weight.shape, torch_conv.bias.shape)
# print(my_conv.weights.shape, my_conv.bias.shape)
my_input_g = my_conv.backward(output_gradient)

atol=1e-6

print("output all close:", np.allclose(my_out, torch_out_np, atol=atol))

print("weights gradients all close:", np.allclose(my_conv.weights_gradient, torch_wg, atol=atol ))

print("bias gradients all close:", np.allclose(my_conv.bias_gradient, torch_bg, atol=atol))

print("input gradients all close:", np.allclose(my_input_g, torch_input_g, atol=atol))

output all close: True
weights gradients all close: True
bias gradients all close: True
input gradients all close: True


In [10]:
"""
BottleNeckLayer test
"""

batch_size = 5
in_channels = 8
bottleneck_depth = 2
width = 6
height = 6

expansion_factor = 4
n_output_channels = bottleneck_depth * expansion_factor

for stride_for_downsampling in (1, 2):  # Checking both cases: no downsampling and downsampling
    print(f"stride = {stride_for_downsampling}")
    input_data = np.random.rand(batch_size, in_channels, width, height).astype(np.float32)
    input_data_torch = torch.from_numpy(input_data).float()
    input_data_torch.requires_grad = True

    if stride_for_downsampling == 1:
        output_width = width
        output_height = height
    if stride_for_downsampling == 2:
        output_width = width // 2
        output_height = height // 2
    output_gradient = np.random.rand(batch_size, n_output_channels, output_width, output_height).astype(np.float32)

    torch_bottleneck = Bottleneck_torch(in_channels, bottleneck_depth, stride_for_downsampling)
    my_bottleneck = Bottleneck(in_channels, bottleneck_depth, stride_for_downsampling)

    my_bottleneck.conv1.weights = torch_bottleneck.conv1.weight.detach().numpy()
    my_bottleneck.conv2.weights = torch_bottleneck.conv2.weight.detach().numpy()
    my_bottleneck.conv3.weights = torch_bottleneck.conv3.weight.detach().numpy()

    if my_bottleneck.conv_to_match_dimensions:
        my_bottleneck.conv_to_match_dimensions.weights = torch_bottleneck.conv_to_match_dimensions.weight.detach().numpy()
    
    my_out = my_bottleneck.forward(input_data)
    torch_out = torch_bottleneck(input_data_torch)

    torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
    torch_input_g = input_data_torch.grad.detach().numpy()

    my_input_g = my_bottleneck.backward(output_gradient)

    atol = 1e-6
    print("output all close:", np.allclose(my_out, torch_out.detach().numpy(), atol=atol))
    print("input gradients all close:", np.allclose(my_input_g, torch_input_g, atol=atol))
    print("conv1 weights gradients all close:", np.allclose(my_bottleneck.conv1.weights_gradient, torch_bottleneck.conv1.weight.grad.detach().numpy(), atol=atol))
    print("conv2 weights gradients all close:", np.allclose(my_bottleneck.conv2.weights_gradient, torch_bottleneck.conv2.weight.grad.detach().numpy(), atol=atol))
    print("conv3 weights gradients all close:", np.allclose(my_bottleneck.conv3.weights_gradient, torch_bottleneck.conv3.weight.grad.detach().numpy(), atol=atol))
    if my_bottleneck.conv_to_match_dimensions:
        print("conv_to_match_dimensions weights gradients all close:", np.allclose(my_bottleneck.conv_to_match_dimensions.weights_gradient, torch_bottleneck.conv_to_match_dimensions.weight.grad.detach().numpy(), atol=atol))
    print()

stride = 1
output all close: True
input gradients all close: True
conv1 weights gradients all close: True
conv2 weights gradients all close: True
conv3 weights gradients all close: True

stride = 2
output all close: True
input gradients all close: True
conv1 weights gradients all close: True
conv2 weights gradients all close: True
conv3 weights gradients all close: True
conv_to_match_dimensions weights gradients all close: True

