# Imports

In [1]:
import numpy as np
import pandas as pd
import numexpr as ne

In [3]:
from simplenn.structures.network.activations import LRelu
from simplenn.structures.network.activations import Sigmoid
from simplenn.structures.network.activations import TanH
from simplenn.evaluation.loss import Square
from simplenn.evaluation.loss import NegLogLike
from simplenn.structures.network import Network
from simplenn.structures.network import Layer
from simplenn.optim.gradient import BackProp
from simplenn.optim.evolution import Genetic

In [4]:
%matplotlib

Using matplotlib backend: TkAgg


# Data

In [5]:
xs = np.array(
    [
        [0,0,0,0,0,0,0,0,0,1],
        [0,0,0,0,0,0,0,0,1,0],
        [0,0,0,0,0,0,0,1,0,0],
        [0,0,0,0,0,0,1,0,0,0],
        [0,0,0,0,0,1,0,0,0,0],
        [0,0,0,0,1,0,0,0,0,0],
        [0,0,0,1,0,0,0,0,0,0],
        [0,0,1,0,0,0,0,0,0,0],
        [0,1,0,0,0,0,0,0,0,0],
        [1,0,0,0,0,0,0,0,0,0]
    ],
    dtype='float64'
).T

ys = np.array(
    [
        [0,0,0,0],
        [0,0,0,1],
        [0,0,1,0],
        [0,0,1,1],
        [0,1,0,0],
        [0,1,0,1],
        [0,1,1,0],
        [0,1,1,1],
        [1,0,0,0],
        [1,0,0,1]
    ],
    dtype='float64'
).T

# Experiments

### Genetic Training

In [6]:
popSize = 50
nElitism = 0
generations = 500
tournamentSize = 5
mutationProba = 0.5
mutationScale = 0.05
mutationRelative = False
verboseFreq = 50
recordFreq = 50

In [7]:
net = Network(
    NegLogLike(),
    [
        Layer(xs.shape[0], 7, TanH(), "kaiming"),
        Layer(7, 7, TanH(), "kaiming"),
        Layer(7, ys.shape[0], Sigmoid(), "kaiming", True)
    ]
)

In [8]:
genetic = Genetic()
net = genetic.run(
    net, 
    xs, 
    ys, 
    popSize, 
    tournamentSize,
    nElitism, 
    mutationProba, 
    mutationScale, 
    mutationRelative, 
    generations, 
    verboseFreq, 
    recordFreq)

Generation 0 (ep. 500): 26.429597722893945
Generation 50 (ep. 25500): 6.554460786007132
Generation 100 (ep. 50500): 0.8351360483330259
Generation 150 (ep. 75500): 0.12350923107577268
Generation 200 (ep. 100500): 0.01905290445226976
Generation 250 (ep. 125500): 0.002829211209728115
Generation 300 (ep. 150500): 0.0005005354316920893
Generation 350 (ep. 175500): 0.00012592383129020782
Generation 400 (ep. 200500): 2.5173264013696634e-05
Generation 450 (ep. 225500): 7.444172599665618e-06


In [9]:
net.forward(xs[:,:9]).round(2)

array([[0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1., 0.],
       [0., 0., 1., 1., 0., 0., 1., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1., 0.]])

### Gradient Back-Propagation Training

In [10]:
lRate = 0.1
batchSize = xs.shape[1]
epochs = 100*1000
verboseFreq = 10*1000
recordFreq = 10*1000

In [11]:
net = Network(
    NegLogLike(),
    [
        Layer(xs.shape[0], 7, TanH(), "xavier"),
        Layer(7, 7, TanH(), "xavier"),
        Layer(7, 7, TanH(), "xavier"),
        Layer(7, 7, TanH(), "xavier"),
        Layer(7, ys.shape[0], Sigmoid(), "xavier", True)
    ]
)

In [12]:
bp = BackProp()
bp.run(net, xs, ys, batchSize, lRate, epochs, verboseFreq, recordFreq)

Epoch 0: 31.978311364456587
Epoch 10000: 0.02754628881524296
Epoch 20000: 0.013539621468977782
Epoch 30000: 0.008953049449001412
Epoch 40000: 0.006679198893890351
Epoch 50000: 0.005322529758981614
Epoch 60000: 0.004421873922612738
Epoch 70000: 0.0037807018244523914
Epoch 80000: 0.0033011612520991458
Epoch 90000: 0.0029290722551631844


<simplenn.structures.network.network.Network at 0x7f51a41f0710>

In [13]:
net.forward(xs[:,:9]).round(2)

array([[0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 1., 1., 1., 0.],
       [0., 0., 1., 1., 0., 0., 1., 1., 0.],
       [0., 1., 0., 1., 0., 1., 0., 1., 0.]])

### Network exploration

In [14]:
df = pd.DataFrame({
    f"Layer-{i}":pd.Series([np.abs(n.layers[i].W).sum() for n in genetic.networks])
    for i in range(len(genetic.networks[0].layers))
}).plot(title='Weights Norm Evolution')

In [15]:
## Backprop specific
df = pd.DataFrame({
    f"Layer-{i}":pd.Series([np.abs(n.layers[i].delta_W).sum() for n in bp.networks])
    for i in range(len(bp.networks[0].layers))
}).plot(title='Gradient Norm Evolution')

In [16]:
net = genetic.networks[-1]
df = pd.concat(
    [pd.Series(net.layers[i].W.flatten(), name=f"W{i}") for i in range(len(net.layers))],
    axis=1
)
df.hist(bins=20)

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f51a1332190>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f51a1206bd0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f51a11c8f10>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f51a117dc10>]],
      dtype=object)

In [17]:
net = genetic.networks[-1]
df = pd.concat(
    [pd.Series(net.layers[i].A.flatten(), name=f"A{i}") for i in range(len(net.layers))],
    axis=1
)
df.hist(bins=20)

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f51a1236110>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f51a1002910>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f51a0fbfc50>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f51a0f76950>]],
      dtype=object)

In [18]:
## Backprop specific
net = bp.networks[-1]
df = pd.concat(
    [pd.Series(net.layers[i].delta_W.flatten(), name=f"delta_W{i}") for i in range(len(net.layers))],
    axis=1
)
df.hist(bins=20)

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7f51a1035590>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f51a0df8e90>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f51a0db86d0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f51a0de9ed0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7f51a0dab710>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7f51a0d61f10>]],
      dtype=object)

### Numexpr vs. Numpy

In [10]:
%%timeit
A = np.random.random((10000, 5000))
for _ in range(10):
    A = ne.evaluate("cos(A)")

2.46 s ± 183 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%%timeit
A = np.random.random((10000, 5000))
for _ in range(10):
    A = np.cos(A)

6.07 s ± 155 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
