In [1]:
import numpy as np
from core.nn import NN
from core.functional import softmax

In [2]:
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [3]:
x_train.shape

(60000, 28, 28)

In [4]:
new = x_train.reshape(x_train.shape[0], -1)
new.shape

(60000, 784)

In [5]:
net = NN(784, [8, 8, 10])
net.layer_sizes

[784, 8, 8, 10]

In [6]:
batch_size = 32
losses, accuracies = [], []

In [7]:
samp = np.random.randint(0, x_train.shape[0], size=batch_size)
samp

array([ 8253, 32564, 42227, 47872, 15114, 59269, 58689, 31692, 25265,
       10864, 20486, 59352, 18161, 55280, 40973, 59760,  5454, 21483,
       20321, 22635, 21521, 55442, 28628, 55004, 56627, 11463, 43721,
       42666, 37858, 14671,  3606,  9992])

In [8]:
print(x_train[samp].shape)
xs = x_train[samp].reshape(x_train[samp].shape[0], -1)
print(xs.shape)
ys = y_train[samp]
print(ys.shape)

(32, 28, 28)
(32, 784)
(32,)


In [9]:
predictions = [net(x) for x in xs]
ypred = [softmax(pred) for pred in predictions]
maxpred = [np.argmax(pred) for pred in predictions]
print(len(ypred))
print(ypred)
print(maxpred)

  data = 1 / (1 + np.exp(-x))


32
[[Value(data=0.09647424733998435, grad=0.0), Value(data=0.07650089184305804, grad=0.0), Value(data=0.13213091207437086, grad=0.0), Value(data=0.09179141863560793, grad=0.0), Value(data=0.13755864016115768, grad=0.0), Value(data=0.07079725571192777, grad=0.0), Value(data=0.0787153166343711, grad=0.0), Value(data=0.10945205608746922, grad=0.0), Value(data=0.12154740769734321, grad=0.0), Value(data=0.08503185381470975, grad=0.0)], [Value(data=0.09593323635337184, grad=0.0), Value(data=0.07417271467932238, grad=0.0), Value(data=0.12749859015965426, grad=0.0), Value(data=0.09420499104265996, grad=0.0), Value(data=0.1329870200118193, grad=0.0), Value(data=0.07918078534467071, grad=0.0), Value(data=0.08338490310173739, grad=0.0), Value(data=0.11271128918021954, grad=0.0), Value(data=0.1218318567576204, grad=0.0), Value(data=0.07809461336892433, grad=0.0)], [Value(data=0.08800512538534021, grad=0.0), Value(data=0.06764452512947862, grad=0.0), Value(data=0.13110469777372283, grad=0.0), Value

In [10]:
def encode_ys(ys: list):
    assert isinstance(ys[0], list) == False, "Make sure list is one-dimensional"
    encoded_ys = [[0] * 10 for _ in ys]
    for i in range(len(encoded_ys)):
        encoded_ys[i][ys[i]] = 1
    return encoded_ys

In [11]:
print(ys)
encoded_ys = encode_ys(ys)
print(encoded_ys)

[0 0 0 3 6 5 1 4 2 7 6 3 7 4 1 3 9 7 0 2 0 5 0 4 9 4 4 1 4 6 3 7]
[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0

In [12]:
def nice_mse(ypred: list, encoded_ys: list):
    assert len(ypred) == len(encoded_ys)
    assert len(ypred[0]) == len(encoded_ys[0])
    n = len(ypred)
    m = len(ypred[0])
    loss = 0
    for i in range(n):
        loss += sum([(ypred[i][j] - encoded_ys[i][j])**2 for j in range(m)])
    return loss

In [13]:
def ugly_mse(ypred: list, ys: list):
    assert len(ypred) == len(ys)
    return sum([(ypred[i] - ys[i])**2 for i in range(len(ypred))])

In [14]:
print(ugly_mse(maxpred, ys))
print(nice_mse(ypred, encoded_ys))

283
Value(data=28.934044356207234, grad=0.0)


In [16]:
for k in range(10):
    # generate random sample
    samp = np.random.randint(0, x_train.shape[0], size=batch_size)
    xs = x_train[samp].reshape(x_train[samp].shape[0], -1)
    ys = y_train[samp]
    
    # forward pass
    predictions = [net(x) for x in xs]
    ypred = [softmax(pred) for pred in predictions]
    encoded_ys = encode_ys(ys)
    loss = nice_mse(ypred, encoded_ys)

    # backward pass
    net.zero_grad()
    loss.backward()
    
    # update
    for p in net.parameters:
        p.data += -0.1 * p.grad
    
    print(k, loss.data)
        
print(ypred)

  data = 1 / (1 + np.exp(-x))


0 29.597238843615823
1 29.475836529739492
2 28.758506673574022
3 28.82268646717103
4 29.389160137750046
5 29.154807243857327
6 29.193940991339158
7 28.89207652357036
8 29.06521727647835
9 28.964795879531415
[[Value(data=0.10121274732401904, grad=0.20242549464803808), Value(data=0.07008765695767634, grad=-1.8598246860846472), Value(data=0.12522909674500182, grad=0.25045819349000364), Value(data=0.09574201709879437, grad=0.19148403419758875), Value(data=0.13142030866870794, grad=0.26284061733741587), Value(data=0.0820628928157347, grad=0.1641257856314694), Value(data=0.08506288790822668, grad=0.17012577581645336), Value(data=0.11156333701765984, grad=0.2231266740353197), Value(data=0.11495574410285893, grad=0.22991148820571786), Value(data=0.08266331136132046, grad=0.16532662272264093)], [Value(data=0.10295177120445202, grad=0.20590354240890404), Value(data=0.06788753124687681, grad=0.13577506249375362), Value(data=0.1256937452155142, grad=0.2513874904310284), Value(data=0.09518017221452