# Imports

In [1]:
import numpy as np
import pandas as pd
import numexpr as ne

In [2]:
from simplenn.structures.network.activations import LRelu
from simplenn.structures.network.activations import Sigmoid
from simplenn.structures.network.activations import TanH
from simplenn.evaluation.loss import Square
from simplenn.evaluation.loss import NegLogLike
from simplenn.structures.network import Network
from simplenn.structures.network import Layer
from simplenn.optim.gradient import BackProp
from simplenn.optim.evolution import Genetic

In [3]:
%matplotlib

Using matplotlib backend: TkAgg


# Data

In [4]:
xs = np.array(
    [
        [0,0,0,0,0,0,0,0,0,1],
        [0,0,0,0,0,0,0,0,1,0],
        [0,0,0,0,0,0,0,1,0,0],
        [0,0,0,0,0,0,1,0,0,0],
        [0,0,0,0,0,1,0,0,0,0],
        [0,0,0,0,1,0,0,0,0,0],
        [0,0,0,1,0,0,0,0,0,0],
        [0,0,1,0,0,0,0,0,0,0],
        [0,1,0,0,0,0,0,0,0,0],
        [1,0,0,0,0,0,0,0,0,0]
    ],
    dtype='float64'
).T

ys = np.array(
    [
        [0,0,0,0],
        [0,0,0,1],
        [0,0,1,0],
        [0,0,1,1],
        [0,1,0,0],
        [0,1,0,1],
        [0,1,1,0],
        [0,1,1,1],
        [1,0,0,0],
        [1,0,0,1]
    ],
    dtype='float64'
).T

# Experiments

### Genetic Training

In [5]:
popSize = 50
nElitism = 0
generations = 500
tournamentSize = 5
mutationProba = 0.5
mutationScale = 0.05
mutationRelative = False
verboseFreq = 50
recordFreq = 50

In [6]:
net = Network([
    Layer(xs.shape[0], 7, TanH(), "kaiming"),
    Layer(7, 7, TanH(), "kaiming"),
    Layer(7, ys.shape[0], Sigmoid(), "kaiming", True)
])

In [7]:
loss = NegLogLike(xs, ys)

In [8]:
genetic = Genetic()
net = genetic.run(
    net, 
    loss,
    popSize, 
    tournamentSize,
    nElitism, 
    mutationProba, 
    mutationScale, 
    mutationRelative, 
    generations, 
    verboseFreq, 
    recordFreq)

Generation 0 (ep. 500): 26.036855391251166
Generation 50 (ep. 25500): 8.301523034227332
Generation 100 (ep. 50500): 1.271359557517998
Generation 150 (ep. 75500): 0.1732457900013513
Generation 200 (ep. 100500): 0.028274484663268765
Generation 250 (ep. 125500): 0.005099651656763824
Generation 300 (ep. 150500): 0.0012492455545575413
Generation 350 (ep. 175500): 0.00017795046458963035
Generation 400 (ep. 200500): 3.0470813705071013e-05
Generation 450 (ep. 225500): 7.247590573797943e-06


In [9]:
net.forward(xs[:,:9]).round(2)

array([[0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1., 0.],
       [0., 0., 1., 1., 0., 0., 1., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1., 0.]])

### Gradient Back-Propagation Training

In [5]:
lRate = 0.1
batchSize = xs.shape[1]
epochs = 100*1000
verboseFreq = 10*1000
recordFreq = 10*1000

In [6]:
net = Network([
    Layer(xs.shape[0], 7, TanH(), "xavier"),
    Layer(7, 7, TanH(), "xavier"),
    Layer(7, 7, TanH(), "xavier"),
    Layer(7, 7, TanH(), "xavier"),
    Layer(7, ys.shape[0], Sigmoid(), "xavier", True)
])

In [7]:
loss = NegLogLike(xs, ys)

In [8]:
bp = BackProp()
net = bp.run(
    net, 
    loss, 
    batchSize, 
    lRate, 
    epochs, 
    verboseFreq, 
    recordFreq)

Epoch 0: 27.791463150455247
Epoch 10000: 0.032948381998278944
Epoch 20000: 0.016062976965653827
Epoch 30000: 0.010574306038035561
Epoch 40000: 0.007865323699982364
Epoch 50000: 0.006253909031973549
Epoch 60000: 0.005186511366749021
Epoch 70000: 0.0044279551214476956
Epoch 80000: 0.0038614230671050196
Epoch 90000: 0.003422356908857351


In [9]:
net.forward(xs[:,:9]).round(2)

array([[0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1., 0.],
       [0., 0., 1., 1., 0., 0., 1., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1., 0.]])

### Network exploration

In [11]:
df = pd.DataFrame({
    f"Layer-{i}":pd.Series([np.abs(n.layers[i].W).sum() for n in bp.networks])
    for i in range(len(bp.networks[0].layers))
}).plot(title='Weights Norm Evolution')

In [12]:
## Backprop specific
df = pd.DataFrame({
    f"Layer-{i}":pd.Series([np.abs(n.layers[i].delta_W).sum() for n in bp.networks])
    for i in range(len(bp.networks[0].layers))
}).plot(title='Gradient Norm Evolution')

In [13]:
net = bp.networks[-1]
df = pd.concat(
    [pd.Series(net.layers[i].W.flatten(), name=f"W{i}") for i in range(len(net.layers))],
    axis=1
)
df.hist(bins=20)

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f697d1ec410>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f697d131650>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f697d0e3e50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f697d0a2690>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f697d056e90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f697d0186d0>]],
      dtype=object)

In [14]:
net = bp.networks[-1]
df = pd.concat(
    [pd.Series(net.layers[i].A.flatten(), name=f"A{i}") for i in range(len(net.layers))],
    axis=1
)
df.hist(bins=20)

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f697ce6cf10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f697ce39590>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f697cdecd90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f697cdad5d0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f697cd61dd0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f697cd22610>]],
      dtype=object)

In [15]:
## Backprop specific
net = bp.networks[-1]
df = pd.concat(
    [pd.Series(net.layers[i].delta_W.flatten(), name=f"delta_W{i}") for i in range(len(net.layers))],
    axis=1
)
df.hist(bins=20)

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f697d16df10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f697cb3d510>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f697caf2d10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f697cab1550>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f697ca68d50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f697ca27590>]],
      dtype=object)

### Numexpr vs. Numpy

In [10]:
%%timeit
A = np.random.random((10000, 5000))
for _ in range(10):
    A = ne.evaluate("cos(A)")

2.46 s ± 183 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%%timeit
A = np.random.random((10000, 5000))
for _ in range(10):
    A = np.cos(A)

6.07 s ± 155 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
