In [1]:
import numpy as np
from optimizer import SteepestDescentOptimizer
from nn import NeuralNetworkModule, NeuralNetworkClassifier

In [2]:
# Test SteepestDescentOptimizer.

def dummy_obj(x):
    return np.dot(x, x) + 1

def dummy_grad(x):
    return 2 * x

x = np.random.normal(size=(2,))
optimizer = SteepestDescentOptimizer(1e-1)
max_iter = 10

for i in range(max_iter):
    print(f"Loss at iter {i:02}: {dummy_obj(x):0.2f}")
    x = optimizer.update(x, dummy_grad(x))

Loss at iter 00: 2.30
Loss at iter 01: 1.83
Loss at iter 02: 1.53
Loss at iter 03: 1.34
Loss at iter 04: 1.22
Loss at iter 05: 1.14
Loss at iter 06: 1.09
Loss at iter 07: 1.06
Loss at iter 08: 1.04
Loss at iter 09: 1.02


In [3]:
# Test NeuralNetworkModule.forward

np.random.seed(123)

nn1 = NeuralNetworkModule(dims=[2,3])
W = nn1.parameters["weight_0"]
b = nn1.parameters["bias_0"]

x = np.ones((2, 2))

z = (np.dot(W, x[0].reshape(-1, 1)) + b).reshape(1, -1)
expected_scores = np.log(np.exp(z) / np.exp(z).sum()) # Numerically unstable, but fine for small example.
expected_scores = np.repeat(expected_scores, 2, axis=0)
scores = nn1.forward(x)

print(scores - expected_scores)

[[ 0.00000000e+00  8.88178420e-16 -6.24500451e-17]
 [ 0.00000000e+00  8.88178420e-16 -6.24500451e-17]]


In [4]:
# Test NeuralNetworkModule._compute_loss

y = np.array([0, 2])

expected_loss = -(scores[0, 0] + scores[1, 2]) # Negative log likelihood.
loss = nn1._compute_loss(y, scores)

print(loss - expected_loss)

0.0


In [5]:
# Test NeuralNetworkModule.backward

# x.shape = [batch_size * d_in]
# W.shape = [n_classes * d_in]
# z.shape = [batch_size * n_classes]

expected_grad_W = np.zeros((3, 2))
for i in [0, 1]:
    expected_grad_W[0, i] = -x[0, i] + np.exp(z[0, 0]) / np.exp(z).sum() * x[0, i]
    for j in [1, 2]:
        expected_grad_W[j, i] = np.exp(z[0, j]) / np.exp(z).sum() * x[0, i]
        
expected_grad_b = np.zeros((3, 1))
expected_grad_b[0] = -1 + np.exp(z[0, 0]) / np.exp(z).sum()
for j in [1, 2]:
    expected_grad_b[j] = np.exp(z[0, j]) / np.exp(z).sum()
  
nn1.backward(y, scores)
grad_W = nn1.gradients["weight_0"]
grad_b = nn1.gradients["bias_0"]

print(grad_W - 2 * expected_grad_W)
print(grad_b - 2 * expected_grad_b)

[[ 1.00000000e+00  1.00000000e+00]
 [ 2.08166817e-17  2.08166817e-17]
 [-1.00000000e+00 -1.00000000e+00]]
[[ 1.00000000e+00]
 [ 2.08166817e-17]
 [-1.00000000e+00]]


In [6]:
# End-to-end test 1: Mixture of Gaussians.

n = 100

y_train = np.random.binomial(1, 0.5, size=n)
X_train = np.random.normal(size=(n, 2)) + 10 * y_train.reshape(-1, 1)

In [7]:
batch_size = 16
epochs = 100
dims = [2, 2] 
init_scale = 1.0
optimizer = SteepestDescentOptimizer(stepsize=1e-2)
tol = 1e-8
verbose = True

nn = NeuralNetworkClassifier(
    batch_size=batch_size,
    epochs=epochs,
    dims=dims,
    init_scale=init_scale, 
    optimizer=optimizer,
    tol=tol,
    verbose=verbose,
)

nn.fit(X_train, y_train)

Epoch 0 	 | cross entropy loss: 20.5160 	 | train accuracy: 0.910
Epoch 10 	 | cross entropy loss: 5.3498 	 | train accuracy: 1.000
Epoch 20 	 | cross entropy loss: 2.9371 	 | train accuracy: 1.000
Epoch 30 	 | cross entropy loss: 1.9673 	 | train accuracy: 1.000
Epoch 40 	 | cross entropy loss: 1.5005 	 | train accuracy: 1.000
Epoch 50 	 | cross entropy loss: 1.2502 	 | train accuracy: 1.000
Epoch 60 	 | cross entropy loss: 1.0709 	 | train accuracy: 1.000
Epoch 70 	 | cross entropy loss: 0.9041 	 | train accuracy: 1.000
Epoch 80 	 | cross entropy loss: 0.7897 	 | train accuracy: 1.000
Epoch 90 	 | cross entropy loss: 0.6989 	 | train accuracy: 1.000


<nn.NeuralNetworkClassifier at 0x7f80ac055100>

In [8]:
# End-to-end test 2: XOR.

X_train = 10 * np.random.normal(size=(n, 2))
y_train = (X_train[:, 0] * X_train[:, 1] >= 0).astype(int)

In [17]:
batch_size = 16
epochs = 1000
dims = [2, 2, 2] 
init_scale = 1.0
optimizer = SteepestDescentOptimizer(stepsize=1e-2)
tol = 1e-8
verbose = True

nn = NeuralNetworkClassifier(
    batch_size=batch_size,
    epochs=epochs,
    dims=dims,
    init_scale=init_scale, 
    optimizer=optimizer,
    tol=tol,
    verbose=verbose,
)

nn.fit(X_train, y_train)

Epoch 0 	 | cross entropy loss: 106.4657 	 | train accuracy: 0.710
Epoch 100 	 | cross entropy loss: 61.2103 	 | train accuracy: 0.720
Epoch 200 	 | cross entropy loss: 59.3535 	 | train accuracy: 0.740
Epoch 300 	 | cross entropy loss: 77.9838 	 | train accuracy: 0.660
Epoch 400 	 | cross entropy loss: 61.5183 	 | train accuracy: 0.760
Epoch 500 	 | cross entropy loss: 66.5935 	 | train accuracy: 0.700
Epoch 600 	 | cross entropy loss: 83.6706 	 | train accuracy: 0.770
Epoch 700 	 | cross entropy loss: 69.3438 	 | train accuracy: 0.660
Epoch 800 	 | cross entropy loss: 58.2789 	 | train accuracy: 0.720
Epoch 900 	 | cross entropy loss: 61.7321 	 | train accuracy: 0.720


<nn.NeuralNetworkClassifier at 0x7f80adaec430>