# Neural networks with `autodiff`
This brief demo showcases an important use case of automatic differentiation:
neural networks. We implement two simple neural network models using
our `autodiff` package for a handwritten digit dataset.

In [1]:
import numpy as np
import autodiff.vector as vc

from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

## Load the dataset
We use a boiled-down version of MNIST with only two digits: 0 and 1. The images
are also scaled down to 8 by 8 pixels, rather than the full 28 by 28 pixel
images of MNIST proper. These simplifications enable the demo to run
quickly on a variety of hardware.

In [2]:
X, y = load_digits(n_class=2, return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=42)

## Neural Network I
Below, we implement a simple feed-forward neural network with two fully-
connected layers:

* **First layer:** 5 Neurons, Linear Activation
* **Second layer:** output layer, 1 Neuron, Sigmoid activation

The model is trained with binary cross-entropy loss.

In [3]:
# Simple NN example, 2 layers, linear activation for the first layer and sigmoid activation for the second
d = 8 * 8
l1 = 5
l2 = 1
param_val = np.array([np.random.randn()/d for _ in range(l1*d)]
                     + [np.random.randn()/l1 for _ in range(l2*l1)])


g = vc.vec_gen()

t11 = g.generate(d)
t12 = g.generate(d)
t13 = g.generate(d)
t14 = g.generate(d)
t15 = g.generate(d)

t2 = g.generate(l1)

def nn(input_indexs, param_val, step_size = 0.1, cnt=0):
    L_ders = np.zeros((len(input_indexs), l1*d + l2*l1))
    result = param_val
    for i, input_index in enumerate(input_indexs):
        # normalise the input
        input_ = X_train[input_index, :]/16

        z11 = vc.dot(t11, input_)
        z12 = vc.dot(t12, input_)
        z13 = vc.dot(t13, input_)
        z14 = vc.dot(t14, input_)
        z15 = vc.dot(t15, input_)
        z1 = vc.concat([z11,z12,z13,z14,z15])
        a1 = z1
        
        z2 = vc.concat([vc.dot(t2, a1)])
        f = vc.sigmoid(z2)
        y = y_train[input_index]

        L = -y * vc.log(f) - (1-y) * vc.log(1-f)
        L_der = L.quickderiv(param_val)
        L_ders[i,:] = L_der.flatten()
    result = param_val - np.sum(L_ders, axis=0).flatten() * step_size
    f_val = f.quickeval(result)
    L_val = L.quickeval(result)
    print(f'[iter: {cnt}]  label: {y}   prob: {f_val[0]:.4f}   loss: {L_val[0]:.4f}')
    return result

for j in range(20):
    input_indexs = np.random.choice(X_train.shape[0], 10, replace=False)
    param_val = nn(input_indexs, param_val, step_size=0.1, cnt=j)

[iter: 0]  label: 1   prob: 0.7555   loss: 0.2804
[iter: 1]  label: 1   prob: 0.5642   loss: 0.5723
[iter: 2]  label: 0   prob: 0.2459   loss: 0.2822
[iter: 3]  label: 1   prob: 0.9441   loss: 0.0575
[iter: 4]  label: 1   prob: 0.8671   loss: 0.1426
[iter: 5]  label: 1   prob: 0.9789   loss: 0.0213
[iter: 6]  label: 0   prob: 0.0408   loss: 0.0417
[iter: 7]  label: 1   prob: 0.9998   loss: 0.0002
[iter: 8]  label: 0   prob: 0.0003   loss: 0.0003
[iter: 9]  label: 0   prob: 0.0155   loss: 0.0156
[iter: 10]  label: 1   prob: 0.9970   loss: 0.0030
[iter: 11]  label: 0   prob: 0.0826   loss: 0.0863
[iter: 12]  label: 1   prob: 0.9984   loss: 0.0016
[iter: 13]  label: 0   prob: 0.0014   loss: 0.0014
[iter: 14]  label: 0   prob: 0.0084   loss: 0.0084
[iter: 15]  label: 1   prob: 0.9929   loss: 0.0071
[iter: 16]  label: 1   prob: 0.9998   loss: 0.0002
[iter: 17]  label: 0   prob: 0.0005   loss: 0.0005
[iter: 18]  label: 1   prob: 0.9498   loss: 0.0515
[iter: 19]  label: 0   prob: 0.0019   los

## Neural Network II
This more complex neural network features three fully connected layers:
* **First layer:** 5 Neurons, ReLU Activation
* **Second layer:** 2 Neurons, Linear Activation
* **Third layer:** output layer, 1 Neuron, Sigmoid activation

The model is trained with binary cross-entropy loss.

In [4]:
d = 8 * 8
l1 = 5
l2 = 2
l3 = 1

param_val = np.array([np.random.randn()/d for _ in range(l1*d)]
                     + [np.random.randn()/l1 for _ in range(l2*l1)]
                     + [np.random.randn()/l2 for _ in range(l3*l2)])


g = vc.vec_gen()

t11 = g.generate(d)
t12 = g.generate(d)
t13 = g.generate(d)
t14 = g.generate(d)
t15 = g.generate(d)

t21 = g.generate(l1)
t22 = g.generate(l1)

t3 = g.generate(l2)

def nn(input_indexs, param_val, step_size = 0.1, cnt=0):
    L_ders = np.zeros((len(input_indexs), l1*d + l2*l1 + l3*l2))
    result = param_val
    for i, input_index in enumerate(input_indexs):
        # normalise the input
        
        # layer 1
        input_ = X_train[input_index, :]/16

        z11 = vc.dot(t11, input_)
        z12 = vc.dot(t12, input_)
        z13 = vc.dot(t13, input_)
        z14 = vc.dot(t14, input_)
        z15 = vc.dot(t15, input_)
        z1 = vc.concat([z11,z12,z13,z14,z15])
        a1 = vc.ReLU(z1)
        
        # layer 2
        
        z21 = vc.dot(t21, a1)
        z22 = vc.dot(t22, a1)
        z2 = vc.concat([z21, z22])
        a2 = z2
        
        # layer 3
        
        z3 = vc.concat([vc.dot(t3, a2)])
        f = vc.sigmoid(z3)
        y = y_train[input_index]

        L = -y * vc.log(f) - (1-y) * vc.log(1-f)
        L_der = L.quickderiv(param_val)
        L_ders[i,:] = L_der.flatten()
    result = param_val - np.sum(L_ders, axis=0).flatten() * step_size
    f_val = f.quickeval(result)
    L_val = L.quickeval(result)
    print(f'[iter: {cnt}]  label: {y}   prob: {f_val[0]:.4f}   loss: {L_val[0]:.4f}')
    return result

for j in range(20):
    input_indexs = np.random.choice(X_train.shape[0], 10, replace=False)
    param_val = nn(input_indexs, param_val, step_size=0.1, cnt=j)

[iter: 0]  label: 0   prob: 0.5049   loss: 0.7029
[iter: 1]  label: 1   prob: 0.5180   loss: 0.6578
[iter: 2]  label: 1   prob: 0.5220   loss: 0.6501
[iter: 3]  label: 0   prob: 0.4929   loss: 0.6791
[iter: 4]  label: 0   prob: 0.4847   loss: 0.6630
[iter: 5]  label: 0   prob: 0.4784   loss: 0.6508
[iter: 6]  label: 0   prob: 0.4793   loss: 0.6526
[iter: 7]  label: 0   prob: 0.3659   loss: 0.4555
[iter: 8]  label: 0   prob: 0.0768   loss: 0.0800
[iter: 9]  label: 0   prob: 0.5338   loss: 0.7631
[iter: 10]  label: 1   prob: 0.9381   loss: 0.0639
[iter: 11]  label: 0   prob: 0.3799   loss: 0.4779
[iter: 12]  label: 0   prob: 0.1847   loss: 0.2042
[iter: 13]  label: 1   prob: 0.9938   loss: 0.0063
[iter: 14]  label: 0   prob: 0.0175   loss: 0.0177
[iter: 15]  label: 0   prob: 0.0092   loss: 0.0092
[iter: 16]  label: 0   prob: 0.0042   loss: 0.0042
[iter: 17]  label: 1   prob: 0.9961   loss: 0.0039
[iter: 18]  label: 1   prob: 0.9996   loss: 0.0004
[iter: 19]  label: 1   prob: 0.9984   los