In [38]:
import numpy as np
import pprint as pp

In [40]:
np.random.seed(2)

In [45]:
icats = np.random.randn(500,2) + np.array([0,3])
idogs = np.random.randn(600,2) + np.array([0, -3])
irats = np.random.randn(700,2) + np.array([3,3])

In [63]:
inputs = np.vstack([icats, idogs, irats])

In [90]:
elabels = np.array([0]*500 + [1]*600 + [2]*700)

In [62]:
oneHotLabels = np.zeros((len(elabels), 3))
for i in range(len(oneHotLabels)):
    oneHotLabels[i, elabels[i]] = 1

In [47]:
h_nodes = 4
o_nodes = 3

In [73]:
# Normal Distribution
wh_n = np.random.randn(2, h_nodes)
wh_n

array([[-0.22263398,  0.89484451, -1.42342295,  0.79503913],
       [-1.13442954,  1.13309232, -0.92986841, -0.6399618 ]])

In [74]:
# Random distribution
wh_u = np.random.rand(2, h_nodes)
wh_u

array([[0.48956129, 0.74137849, 0.64827354, 0.13606039],
       [0.71971312, 0.49730462, 0.62929536, 0.10778302]])

In [75]:
wo_n = np.random.randn(h_nodes, o_nodes)
wo_n

array([[ 1.38162947,  0.84126797,  0.13042701],
       [-0.42742182, -0.55765199,  1.63661091],
       [-0.2155254 ,  1.43530239,  1.48810999],
       [-0.20924528, -2.30751849, -0.26943416]])

In [76]:
wo_u = np.random.randn(h_nodes, o_nodes)
wo_u

array([[-0.99551769,  0.02383396, -0.64000418],
       [ 0.01760715,  1.49340464,  0.89976621],
       [-1.18647921,  0.11068749, -1.11833406],
       [ 2.22379007,  1.12493334, -1.12995345]])

In [95]:
bh = np.random.randn(h_nodes)
bo = np.random.randn(o_nodes)

In [77]:
#Sigmoid activation:
def sigmoidActivation(x):
    return 1/(1+np.exp(-x))

In [106]:
#Softmax activation:
def softmaxActivation(vec):
    expVec = np.exp(vec)
#     return expVec/sum(expVec)
    return expVec/expVec.sum(axis=1, keepdims=True)

In [107]:
def forwardPropagation(inputs, wh, wo):
    zh = np.dot(inputs, wh) + bh
    ah = sigmoidActivation(zh)
    
    zo = np.dot(ah,wo) + bo
    ao = softmaxActivation(zo)
    
    return zh, ah, zo, ao
    

In [108]:
zh, ah, zo, ao = forwardPropagation(inputs, wh_n, wo_n)

In [112]:
print('zh shape: ', zh.shape)
print('ah shape: ', ah.shape)
print('zo shape: ', zo.shape)
print('ao shape: ', ao.shape)

print(ao[0:3])
print(ao[0].sum())

zh shape:  (1800, 4)
ah shape:  (1800, 4)
zo shape:  (1800, 3)
ao shape:  (1800, 3)
[[0.039464   0.01439242 0.94614358]
 [0.05983229 0.00628863 0.93387908]
 [0.05255722 0.00970719 0.93773559]]
0.9999999999999999


In [119]:
def sigmoidDerivative(x):
    return sigmoidActivation(x)*(1 - sigmoidActivation(x))

In [135]:
def backPropagationValues(inputs,zh, ah, zo, ao):
    dcost_dzo = ao - oneHotLabels
    dzo_dwo = ah
    dcost_dwo = np.dot(dzo_dwo.T, dcost_dzo)
    dcost_dbo = ao - oneHotLabels
    
    dzo_dah = wo_n
    dcost_dah = np.dot(dcost_dzo , dzo_dah.T)
    dah_dzh = sigmoidDerivative(zh)
    dzh_dwh = inputs
    dcost_dwh = np.dot(dzh_dwh.T, dah_dzh * dcost_dah)
    dcost_dbh = dcost_dah * dah_dzh
    
    return dcost_dwo, dcost_dbo, dcost_dwh, dcost_dbh

In [136]:
dcost_dwo, dcost_dbo, dcost_dwh, dcost_dbh =backPropagationValues(inputs,zh, ah, zo, ao)


In [137]:
dcost_dwh.shape

(2, 4)

In [147]:
epochs = 500000
epochPrintRate = 5000
learning_rate = 0.01
epoch_loss = []
lr = 10e-4
for i in range(epochs):
    zh, ah, zo, ao = forwardPropagation(inputs, wh_n, wo_n)
    dcost_dwo, dcost_dbo, dcost_dwh, dcost_dbh =backPropagationValues(inputs,zh, ah, zo, ao)
    wo_n = wo_n - (learning_rate * dcost_dwo)
    bo   = bo - (learning_rate * dcost_dbo)
    wh_n = wh_n - (learning_rate * dcost_dwh)
    bh   = bh - (learning_rate * dcost_dbh)
    if i % epochPrintRate == 0:
        loss = np.sum(-oneHotLabels*np.log(ao))
        print('Loss: ', loss)
        
        
        
    

Loss:  0.24596705941951783
Loss:  0.23782719702400273
Loss:  0.23020469067931673
Loss:  0.22305180804305022
Loss:  0.21632650903699904
Loss:  0.20999162255897308
Loss:  0.2040141619640232
Loss:  0.19836475278628607
Loss:  0.19301715181261675
Loss:  0.18794784094568212
Loss:  0.18313568263772148
Loss:  0.17856162627992025
Loss:  0.17420845697249743
Loss:  0.17006057971039196
Loss:  0.16610383329726688
Loss:  0.1623253293202884
Loss:  0.15871331233694624
Loss:  0.15525703808599023
Loss:  0.15194666707020452
Loss:  0.14877317129547205
Loss:  0.14572825230796924
Loss:  0.14280426896519594
Loss:  0.13999417361897987
Loss:  0.1372914555901303
Loss:  0.1346900909811443
Loss:  0.13218449801361262
Loss:  0.12976949719385233
Loss:  0.12744027570893318
Loss:  0.12519235553810074
Loss:  0.12302156483534027
Loss:  0.12092401219817323
Loss:  0.11889606348882348
Loss:  0.11693432091727345
Loss:  0.11503560413278852
Loss:  0.11319693310256207
Loss:  0.1114155125834126
Loss:  0.10968871801634117
Loss: 

In [145]:
ep_ls = []
ep_ls.append([1,2])

In [146]:
ep_ls

[[1, 2]]

In [149]:
print(ao[-1])

[3.93581320e-06 4.28790551e-11 9.99996064e-01]
