In [1]:
import cppapi
from sklearn.datasets import load_diabetes
import numpy as np
import random
import os

In [2]:
print(os.getcwd())
os.chdir('/home/thomas/cpp-projects/low-level-deep-learning')

from python_tests.test_activation import sigmoid

/home/thomas/cpp-projects/low-level-deep-learning/examples


In [3]:

from sklearn.datasets import make_regression

data, target = make_regression(n_samples=1000, n_features=10, noise=2, random_state=1234)

In [4]:

# Computational graph of the simple hard coded neural network we are testing
#X-> |---------|          |-------- |                        |--------|          
#    | v(X, W)|  -> M1 -> |A(M1 + B0)|  --> sigma(M1 + B0)-> |v(O1,W2)|  -> A(M2,B2) -> P ---> Lambda(P, Y) --> L                              
#W-> |---------|          |-------- |                        |--------|        ^B2                      ^Y
# here sigma represents a loss function
#diabetes = load_diabetes()
#targets = diabetes.target.astype(np.float32)
#data = diabetes.data.astype(np.float32)
targets_ = target.reshape(-1, 1).astype(np.float32)

hidden_size = 32
batch_size = 100
num_features = data.shape[1]

W1 = np.random.randn(num_features, hidden_size).astype(np.float32)
W2 = np.random.randn(hidden_size, 1).astype(np.float32)
B1 = np.random.randn(1, hidden_size).astype(np.float32)
B2 = random.random()

nn = cppapi.SimpleNeuralNetwork(batch_size, num_features, 1, hidden_size, W1, W2, B1, B2)

loss = nn._forward_pass_one_step(cppapi.Activation.SIGMOID, cppapi.Loss.RMSE, data.astype(np.float32), targets_)

# Now at this point we can access all of the objects inside the neural network class
# so we need to make sure that a manual one step forward pass over a simple computation graph aligns
rtol = 1e-3
atol = 1e-3
M1 = np.dot(data, W1)
print("M1", M1.shape, "B1", B1.shape, "c++ M1", nn.M1.shape)
N1 = M1 + B1
print("c++ N1", nn.N1.shape, "py N1", N1.shape)
print(np.allclose(N1, nn.N1, rtol, atol))
O1 = sigmoid(N1)
M2 = np.dot(O1, W2)
P = M2 + B2
pyloss = np.sqrt(np.mean(np.power(targets_ - P, 2)))
print(loss, pyloss)
print("The loss values after one step are approx equal:", np.isclose(pyloss, loss))


M1 (1000, 32) B1 (1, 32) c++ M1 (1000, 32)
c++ N1 (1000, 32) py N1 (1000, 32)
True
237.98251342773438 237.98255461583824
The loss values after one step are approx equal: True


Now for one step of the backward pass

In [5]:
nn._backward_pass(cppapi.Activation.SIGMOID, data.astype(np.float32), targets_)

In [6]:
from typing import Callable

def deriv(func: Callable[[np.ndarray], np.ndarray],
          input_: np.ndarray,
          delta: float = 0.001) -> np.ndarray:
    return (func(input_ + delta) - func(input_ - delta)) / (2. * delta)

In [7]:
dLdP = -(targets_ - P)
print(dLdP.shape)
print(M2.shape)
dPdM2 = np.ones_like(M2)
dLdM2 = dLdP @ dPdM2.transpose()
print("dLdM2", dLdM2.shape)
dPdB2 = np.ones([1, 1])
dLdB2 = (dLdP * dPdB2).sum(axis=0)

rtol = 1e-4
atol = 1e-4
print("dLdP", np.allclose(nn.get_dLdP(), dLdP, rtol, atol))
print("c++ dLdP", nn.get_dLdP().shape)
print(dLdP.shape, dLdB2.shape)
print("dLdB2", dLdB2.squeeze(), "c++", nn.dLdB2)

dM2dW2 = np.transpose(O1, (1, 0))
dLdW2 = dM2dW2 @ dLdP
print("dM2dW2", dM2dW2.shape, "dPdM2", dPdM2.shape, "dLdP", dLdP.shape)
print(dLdW2.shape, nn.dLdW2.shape)
print(np.allclose(dLdW2, nn.dLdW2, rtol, atol))

dM2dO1 = np.transpose(W2, (1, 0))
print(dM2dO1.shape)
dLdO1 = dLdP @ dM2dO1
print("N1", N1.shape)
# Using the finite differences method causes a numerical error between the matrices
# and dO1dN1 across the FFI are not always the same
#dO1dN1 = deriv(sigmoid, N1, 0.0001)
dO1dN1 = sigmoid(N1) * (1.0 - sigmoid(N1))
print("dO1dN1", dO1dN1.shape, "dLdO1", dLdO1.shape)
dLdN1 = dLdO1 * dO1dN1
print("dO1dN1:", np.allclose(dO1dN1, nn.get_dO1dN1(), rtol, atol))
print("dLdO1:", np.allclose(dLdO1, nn.get_dLdO1(), rtol, atol))
# inputs to dLdN1 are dLdO1 and dO1dN1
print("python specific dLdN1", np.allclose(dLdN1, nn.get_dLdO1() * nn.get_dO1dN1(), rtol, atol))
print("dLdN1:", np.allclose(dLdN1, nn.get_dLdN1(), rtol, atol))
print("Max Abs diff dLdN1", np.max(np.abs(dLdN1 - nn.get_dLdN1())))
print("python dLdN1", dLdN1.shape, f"[1, {hidden_size}]")
dLdB1 = (dLdN1 * np.ones([1, hidden_size])).sum(axis=0)
print("c++ dLdB1", nn.dLdB1.shape)
print("dLdB1", np.allclose(dLdB1, nn.dLdB1, rtol, atol))


(1000, 1)
(1000, 1)
dLdM2 (1000, 1000)
dLdP True
c++ dLdP (1000, 1)
(1000, 1) (1,)
dLdB2 -7229.065436884959 c++ -7229.0654296875
dM2dW2 (32, 1000) dPdM2 (1000, 1) dLdP (1000, 1)
(32, 1) (32, 1)
True
(1, 32)
N1 (1000, 32)
dO1dN1 (1000, 32) dLdO1 (1000, 32)
dO1dN1: True
dLdO1: True
python specific dLdN1 True
dLdN1: True
Max Abs diff dLdN1 7.208999281838047e-05
python dLdN1 (1000, 32) [1, 32]
c++ dLdB1 (1, 32)
dLdB1 True
[   74.36572416   -94.01144567 -1245.70562777 -2931.52306057
  1288.37632207  -841.61262539  -150.87037323  2381.30506004
  3549.35288238 -5486.58458957  -199.37186708  1152.5002679
 -4569.4836983   -764.71480892   112.97116454  -522.67493637
 -1762.78415432  1120.76075094  -235.35822516  -160.22797726
 -1233.48978973  3939.08062611   184.7840942     14.8061001
 -2317.9885899  -3036.68273995 -1227.38893772 -2923.50815121
    88.21299336  3223.28021413 -1108.11527604  -254.97832133]
[   74.36572416   -94.01144567 -1245.70562777 -2931.52306057
  1288.37632207  -841.61262539