In [1]:
import sys
import os
sys.path.append(os.path.join(os.curdir, os.pardir, os.pardir))

In [2]:
import numpy as np
from data.mnist import read_data
from net.Model import Model
from common.layer import Dense, Relu, BatchNorm, SoftmaxWithLoss

In [3]:
np.random.seed(47)
model = Model()
model.add(Dense(784, 50))
model.add(BatchNorm(50))
model.add(Relu())
model.add(Dense(50, 10))
model.build(SoftmaxWithLoss())

print(model.network.layers)
print(model.network.layers_cnt)

OrderedDict([('Dense_1', <common.layer.Dense object at 0x111a84d30>), ('BatchNorm_1', <common.layer.BatchNorm object at 0x111a84dd8>), ('Relu_1', <common.layer.Relu object at 0x111a84e48>), ('Dense_2', <common.layer.Dense object at 0x111a84e80>)])
{'Dense': 2, 'BatchNorm': 1, 'Relu': 1}


In [4]:
model.network.params

{'Dense_1_W': array([[-8.48009476e-03,  1.30590636e-02,  9.24207966e-03, ...,
          8.23346112e-03, -1.05748398e-02,  5.51049366e-03],
        [ 2.74123250e-03,  3.67296464e-03,  1.95889334e-03, ...,
          8.20486612e-03,  5.81123877e-03,  1.77745083e-02],
        [-9.84595264e-03,  4.03200469e-03,  9.32856212e-03, ...,
          4.92271708e-03,  5.41701014e-03,  5.04837520e-03],
        ...,
        [ 8.98375443e-03, -1.41161880e-02,  2.50124210e-02, ...,
          6.19449836e-03,  3.38528603e-04,  6.60948448e-03],
        [-1.92463518e-02, -3.17399354e-03, -1.17530425e-03, ...,
          7.64916707e-03, -5.47360566e-03, -1.01511629e-02],
        [ 1.60821315e-02,  1.33322891e-02,  2.04653618e-05, ...,
          5.11716742e-03, -4.97369082e-03, -1.27795926e-02]]),
 'Dense_1_b': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [5]:
data_set = read_data(base_dir='../../mnist_data', one_hot=True)
x_batch = data_set.train.images[:3]
y_batch = data_set.train.labels[:3]
x_batch.shape

reading ../../mnist_data/train-images-idx3-ubyte.gz
reading ../../mnist_data/t10k-images-idx3-ubyte.gz
reading ../../mnist_data/train-labels-idx1-ubyte.gz
reading ../../mnist_data/t10k-labels-idx1-ubyte.gz


(3, 784)

In [6]:
model.network.loss(x_batch, y_batch)

11.344696340876878

In [7]:
gradient_numerical = model.network.numerical_gradient(x_batch, y_batch)
gradient_backprop = model.network.gradient(x_batch, y_batch)

for key in gradient_numerical.keys():
    diff = np.average(np.abs(gradient_backprop[key] - gradient_numerical[key]))
    print("{}: {}".format(key, diff))

Dense_1_W: 1.077731465394156e-08
Dense_1_b: 6.4618449480136066e-18
BatchNorm_1_gamma: 2.830608046507572e-09
BatchNorm_1_beta: 2.7280324315511903e-09
Dense_2_W: 7.944563970639379e-08
Dense_2_b: 1.4489880451884485e-07


In [8]:
model.network.dparams

{'Dense_1_W': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'Dense_1_b': array([ 1.38777878e-17,  1.73472348e-18,  6.93889390e-18,  3.25260652e-19,
        -1.38777878e-17,  0.00000000e+00, -1.30104261e-18,  3.46944695e-18,
         1.08420217e-19,  0.00000000e+00, -8.67361738e-19,  0.00000000e+00,
         4.33680869e-19,  0.00000000e+00, -1.73472348e-18,  0.00000000e+00,
         8.32667268e-17, -6.93889390e-18,  0.00000000e+00,  6.93889390e-18,
        -3.46944695e-18, -6.93889390e-18,  0.00000000e+00, -5.55111512e-17,
        -1.38777878e-17,  0.00000000e+00,  6.93889390e-18,  6.93889390e-18,
         0.00000000e+00,  1.38777878e-17,  8.67361738e-19, -6.93889390e-18,
         0.00000000e+00,  2.16840434e-18,  6.93889390e-18,  1.38777878e-17,
         1.73472348e-18,  8.67361738e-19,