In [1]:
import torch
torch.use_deterministic_algorithms(True)

import numpy as np
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt

from NumpyNN.NN_np import (
    FullyConnectedLayer,
    ReLULayer,
    SigmoidLayer,
    ReLULayer,
    AdamOptimizer,
    CrossEntropyLoss,
    LinearActivation,
    Sequential,
    Optimizer,
    SoftMaxLayer,
    GradientDescentOptimizer,
    CrossEntropyLossWithSoftMax,
)

plt.gray()

  from .autonotebook import tqdm as notebook_tqdm


<Figure size 432x288 with 0 Axes>

In [2]:
"""
FullyConnectedLayer test
"""

n_input_features = 6
n_output_features = 3
n_samples = 5
input_data = np.random.rand(n_samples, n_input_features).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(n_samples, n_output_features).astype(np.float32)

torch_fc = torch.nn.Linear(n_input_features, n_output_features)
torch_out = torch_fc(input_data_torch)
torch_out_np = torch_out.detach().numpy()
torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
torch_wg = torch_fc.weight.grad.detach().numpy().T
torch_bg = torch_fc.bias.grad.detach().numpy().reshape(-1, 1).T
# torch_input_g = input_data_torch.grad.detach().numpy()


my_fc = FullyConnectedLayer(n_input_features, n_output_features)
my_fc.weights = torch_fc.weight.detach().numpy().T
my_fc.bias = torch_fc.bias.detach().numpy().reshape(-1, 1).T
my_out = my_fc.forward(input_data)
my_input_g = my_fc.backward(output_gradient)
my_wg = my_fc.weights_gradient
my_bg = my_fc.bias_gradient


print("output all close:", np.allclose(my_out, torch_out_np))
print("w gradients all close:", np.allclose(my_wg, torch_wg))
print("b gradients all close:", np.allclose(my_bg, torch_bg))
print("input gradients all close:", np.allclose(my_input_g, input_data_torch.grad))
# print("input gradients all close:", np.allclose(my_input_g, torch_bg))

# print("all parameters shape same: ", my_fc.weights.shape == torch_fc.weight.T.shape and my_fc.bias.shape == torch.unsqueeze(torch_fc.bias, 1).shape)
# print("output sum of square dif:", np.square(my_out - torch_out_np).sum())
# print(torch_wg.sum(), my_wg.sum())
# print("w gradient sum of square dif:", np.square(my_wg - torch_wg).sum())

#print()
#print(my_wg)
#print()
#print(torch_wg)

output all close: True
w gradients all close: True
b gradients all close: True
input gradients all close: True


In [21]:
"""
CrossEntropyLoss test
"""
def one_hot(y: np.ndarray, n_classes: int):
    encoded = np.zeros((y.size, n_classes))
    encoded[np.arange(y.size), y] = 1
    return encoded


batch_size = 5
n_classes = 3
pred = np.random.rand(batch_size, n_classes).astype(np.float32)
true = one_hot(np.random.randint(0, n_classes, batch_size), n_classes)
pred_torch = torch.from_numpy(pred).float()
true_torch = torch.from_numpy(true).float()
pred_torch.requires_grad = True

torch_loss  = torch.nn.CrossEntropyLoss()
torch_loss_val = torch_loss(pred_torch, true_torch)
torch_loss_val.backward()

my_loss = CrossEntropyLossWithSoftMax()
my_loss_val = my_loss.forward(pred, true)
my_loss.backward()

print("loss_val all close:", np.allclose(my_loss_val, torch_loss_val.detach().numpy()))
print("loss gradients all close:", np.allclose(my_loss.backward(), pred_torch.grad))

loss_val all close: True
loss gradients all close: True


In [10]:
"""
ReLULayer test
"""

n_input_features = 6
n_output_features = 3
n_samples = 5
input_data = np.random.rand(n_samples, n_input_features).astype(np.float32)
input_data_torch = torch.from_numpy(input_data).float()
input_data_torch.requires_grad = True
output_gradient = np.random.rand(n_samples, n_input_features).astype(np.float32)


torch_relu = torch.nn.ReLU()
torch_out = torch_relu(input_data_torch)
torch_out_np = torch_out.detach().numpy()
torch_out.backward(torch.tensor(output_gradient), retain_graph=True)
# torch_input_g = input_data_torch.grad.detach().numpy()


my_relu = ReLULayer()
my_out = my_relu.forward(input_data)


print("output all close:", np.allclose(my_out, torch_out_np))
print("input gradients all close:", np.allclose(my_relu.backward(output_gradient), input_data_torch.grad))
# print("input gradients all close:", np.allclose(my_input_g, torch_bg))

# print("all parameters shape same: ", my_fc.weights.shape == torch_fc.weight.T.shape and my_fc.bias.shape == torch.unsqueeze(torch_fc.bias, 1).shape)
# print("output sum of square dif:", np.square(my_out - torch_out_np).sum())
# print(torch_wg.sum(), my_wg.sum())
# print("w gradient sum of square dif:", np.square(my_wg - torch_wg).sum())

#print()
#print(my_wg)
#print()
#print(torch_wg)

output all close: True
input gradients all close: True
