In [1]:
import torch
torch.use_deterministic_algorithms(True)

import numpy as np
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt

from NumpyNN.NN_np import (
    FullyConnectedLayer,
    ReLULayer,
    SigmoidLayer,
    ReLULayer,
    AdamOptimizer,
    CrossEntropyLoss,
    LinearActivation,
    Sequential,
    Optimizer,
    SoftMaxLayer,
    GradientDescentOptimizer,
    CrossEntropyLossWithSoftMax,
    Conv2d,
)

from numpy_resnet import Bottleneck

plt.gray()


  from .autonotebook import tqdm as notebook_tqdm


<Figure size 432x288 with 0 Axes>

In [2]:
import sys
import os 
sys.path.append(
    sys.path[0].removesuffix("numpy_CNN") + "pytorch_implementations"
)

from resnet import Bottleneck as Bottleneck_torch

In [3]:
"""
FullyConnectedLayer test
"""

n_input_features = 6
n_output_features = 3
n_samples = 5
input_data = np.random.rand(n_samples, n_input_features).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(n_samples, n_output_features).astype(np.float32)

torch_fc = torch.nn.Linear(n_input_features, n_output_features)
torch_out = torch_fc(input_data_torch)
torch_out_np = torch_out.detach().numpy()
torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
torch_wg = torch_fc.weight.grad.detach().numpy().T
torch_bg = torch_fc.bias.grad.detach().numpy().reshape(-1, 1).T
# torch_input_g = input_data_torch.grad.detach().numpy()


my_fc = FullyConnectedLayer(n_input_features, n_output_features)
my_fc.weights = torch_fc.weight.detach().numpy().T
my_fc.bias = torch_fc.bias.detach().numpy().reshape(-1, 1).T
my_out = my_fc.forward(input_data)
my_input_g = my_fc.backward(output_gradient)
my_wg = my_fc.weights_gradient
my_bg = my_fc.bias_gradient


print("output all close:", np.allclose(my_out, torch_out_np))
print("w gradients all close:", np.allclose(my_wg, torch_wg))
print("b gradients all close:", np.allclose(my_bg, torch_bg))
print("input gradients all close:", np.allclose(my_input_g, input_data_torch.grad))
# print("input gradients all close:", np.allclose(my_input_g, torch_bg))

# print("all parameters shape same: ", my_fc.weights.shape == torch_fc.weight.T.shape and my_fc.bias.shape == torch.unsqueeze(torch_fc.bias, 1).shape)
# print("output sum of square dif:", np.square(my_out - torch_out_np).sum())
# print(torch_wg.sum(), my_wg.sum())
# print("w gradient sum of square dif:", np.square(my_wg - torch_wg).sum())

#print()
#print(my_wg)
#print()
#print(torch_wg)

output all close: True
w gradients all close: True
b gradients all close: True
input gradients all close: True


In [4]:
"""
CrossEntropyLoss test
"""
def one_hot(y: np.ndarray, n_classes: int):
    encoded = np.zeros((y.size, n_classes))
    encoded[np.arange(y.size), y] = 1
    return encoded


batch_size = 5
n_classes = 3
pred = np.random.rand(batch_size, n_classes).astype(np.float32)
true = one_hot(np.random.randint(0, n_classes, batch_size), n_classes)
pred_torch = torch.from_numpy(pred).float()
true_torch = torch.from_numpy(true).float()
pred_torch.requires_grad = True

torch_loss  = torch.nn.CrossEntropyLoss()
torch_loss_val = torch_loss(pred_torch, true_torch)
torch_loss_val.backward()

my_loss = CrossEntropyLossWithSoftMax()
my_loss_val = my_loss.forward(pred, true)
my_loss.backward()

print("loss_val all close:", np.allclose(my_loss_val, torch_loss_val.detach().numpy()))
print("loss gradients all close:", np.allclose(my_loss.backward(), pred_torch.grad))

loss_val all close: True
loss gradients all close: True


In [5]:
"""
ReLULayer test
"""

n_input_features = 6
n_output_features = 3
n_samples = 5
input_data = np.random.rand(n_samples, n_input_features).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(n_samples, n_input_features).astype(np.float32)


torch_relu = torch.nn.ReLU()
torch_out = torch_relu(input_data_torch)
torch_out_np = torch_out.detach().numpy()
torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
# torch_input_g = input_data_torch.grad.detach().numpy()


my_relu = ReLULayer()
my_out = my_relu.forward(input_data)


print("output all close:", np.allclose(my_out, torch_out_np))
print("input gradients all close:", np.allclose(my_relu.backward(output_gradient), input_data_torch.grad))
# print("input gradients all close:", np.allclose(my_input_g, torch_bg))

# print("all parameters shape same: ", my_fc.weights.shape == torch_fc.weight.T.shape and my_fc.bias.shape == torch.unsqueeze(torch_fc.bias, 1).shape)
# print("output sum of square dif:", np.square(my_out - torch_out_np).sum())
# print(torch_wg.sum(), my_wg.sum())
# print("w gradient sum of square dif:", np.square(my_wg - torch_wg).sum())

#print()
#print(my_wg)
#print()
#print(torch_wg)

output all close: True
input gradients all close: True


In [6]:
"""
SigmoidLayer test
"""

n_input_features = 6
n_output_features = 3
n_samples = 5
input_data = np.random.rand(n_samples, n_input_features).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(n_samples, n_input_features).astype(np.float32)


torch_sigmoid = torch.nn.Sigmoid()
torch_out = torch_sigmoid(input_data_torch)
torch_out_np = torch_out.detach().numpy()
torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
# torch_input_g = input_data_torch.grad.detach().numpy()


my_sigmoid = SigmoidLayer()
my_out = my_sigmoid.forward(input_data)


print("output all close:", np.allclose(my_out, torch_out_np))
print("input gradients all close:", np.allclose(my_sigmoid.backward(output_gradient), input_data_torch.grad))
# print("input gradients all close:", np.allclose(my_input_g, torch_bg))

# print("all parameters shape same: ", my_fc.weights.shape == torch_fc.weight.T.shape and my_fc.bias.shape == torch.unsqueeze(torch_fc.bias, 1).shape)
# print("output sum of square dif:", np.square(my_out - torch_out_np).sum())
# print(torch_wg.sum(), my_wg.sum())
# print("w gradient sum of square dif:", np.square(my_wg - torch_wg).sum())

#print()
#print(my_wg)
#print()
#print(torch_wg)

output all close: True
input gradients all close: True


In [7]:
"""
Conv2DLayer test
"""

batch_size = 5
n_input_channels = 4
n_output_channels = 2
width = 5
height = 5

kernel_size = 3
stride = 1
padding = 1

input_data = np.random.rand(batch_size, n_input_channels, width, height).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(batch_size, n_output_channels, width, height).astype(np.float32)

torch_conv = torch.nn.Conv2d(n_input_channels, n_output_channels, kernel_size, stride, padding)

my_conv = Conv2d(n_input_channels, n_output_channels, kernel_size, stride, padding)
my_conv.weights = torch_conv.weight.detach().numpy()
my_conv.bias = torch_conv.bias.detach().numpy().reshape(-1, 1)

my_out = my_conv.forward(input_data)

torch_out = torch_conv(input_data_torch)
torch_out_np = torch_out.detach().numpy()
torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
torch_input_g = input_data_torch.grad.detach().numpy()
torch_wg = torch_conv.weight.grad.detach().numpy()
torch_bg = torch_conv.bias.grad.detach().numpy().reshape(-1, 1)

# print(torch_conv.weight.shape, torch_conv.bias.shape)
# print(my_conv.weights.shape, my_conv.bias.shape)
my_input_g = my_conv.backward(output_gradient)

atol=1e-6

print("output all close:", np.allclose(my_out, torch_out_np, atol=atol))

print("weights gradients all close:", np.allclose(my_conv.weights_gradient, torch_wg, atol=atol ))

print("bias gradients all close:", np.allclose(my_conv.bias_gradient, torch_bg, atol=atol))

print("input gradients all close:", np.allclose(my_input_g, torch_input_g, atol=atol))

output all close: True
weights gradients all close: True
bias gradients all close: True
input gradients all close: True


In [8]:
"""
BottleNeckLayer test
"""

batch_size = 5
n_input_channels = 4
bottleneck_depth = 2
width = 5
height = 5
# ! Better to check both cases: for stride_for_downsampling in (1, 2):
stride_for_downsampling = 2
expansion_factor = 4
n_output_channels = n_input_channels * expansion_factor


input_data = np.random.rand(batch_size, n_input_channels, width, height).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(batch_size, n_output_channels, width, height).astype(np.float32)

torch_bottleneck = Bottleneck_torch(n_input_channels, bottleneck_depth, stride_for_downsampling)
my_bottleneck = Bottleneck(n_input_channels, bottleneck_depth, stride_for_downsampling)

my_bottleneck.conv1.weights = torch_bottleneck.conv1.weight.detach().numpy()
my_bottleneck.conv2.weights = torch_bottleneck.conv2.weight.detach().numpy()
my_bottleneck.conv3.weights = torch_bottleneck.conv3.weight.detach().numpy()

my_out = my_bottleneck.forward(input_data)
torch_out = torch_bottleneck(input_data_torch)

atol = 1e-6
print("output all close:", np.allclose(my_out, torch_out.detach().numpy(), atol=atol))

output all close: False


In [9]:
my_out

array([[[[0.06232188, 0.05193541, 0.00277902],
         [0.01627721, 0.04940979, 0.        ],
         [0.02522602, 0.        , 0.        ]],

        [[0.        , 0.        , 0.00176693],
         [0.06547843, 0.        , 0.11744102],
         [0.01468656, 0.14748987, 0.06019311]],

        [[0.06378714, 0.06039981, 0.00921874],
         [0.08654278, 0.04993292, 0.07810593],
         [0.05751046, 0.08303892, 0.00818532]],

        [[0.        , 0.        , 0.        ],
         [0.04368356, 0.        , 0.1065679 ],
         [0.00139902, 0.13335638, 0.05540168]],

        [[0.00707051, 0.02321516, 0.00345614],
         [0.12365073, 0.00326096, 0.19838589],
         [0.04233807, 0.21866306, 0.0697446 ]],

        [[0.        , 0.        , 0.01657281],
         [0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ]],

        [[0.01310711, 0.00777732, 0.        ],
         [0.        , 0.00779931, 0.        ],
         [0.        , 0.        , 0.        ]],


In [10]:
torch_out.detach().numpy()

array([[[[0.00000000e+00, 1.90505728e-01, 0.00000000e+00],
         [0.00000000e+00, 3.80296558e-02, 3.70121717e-01],
         [1.67176425e-02, 7.41421282e-02, 0.00000000e+00]],

        [[4.37806815e-01, 2.87687868e-01, 1.26285627e-01],
         [4.70723450e-01, 3.69594574e-01, 0.00000000e+00],
         [1.96808189e-01, 3.99078727e-01, 3.79391253e-01]],

        [[4.40131202e-02, 1.29432663e-01, 0.00000000e+00],
         [2.24688083e-01, 0.00000000e+00, 3.00731480e-01],
         [0.00000000e+00, 0.00000000e+00, 0.00000000e+00]],

        [[4.89399046e-01, 5.21139085e-01, 1.80581450e-01],
         [5.70149362e-01, 4.57961649e-01, 2.62002051e-01],
         [2.70077497e-01, 5.59989095e-01, 4.32028204e-01]],

        [[6.86284781e-01, 7.94498920e-01, 4.86240476e-01],
         [8.75468016e-01, 5.71602106e-01, 8.61682236e-01],
         [5.97804904e-01, 8.03985357e-01, 5.34447491e-01]],

        [[2.39965301e-02, 8.94389227e-02, 6.63924590e-02],
         [0.00000000e+00, 1.66198537e-01, 0.00