In [750]:
import keras, numpy as np, matplotlib.pyplot as plt
from keras.datasets import mnist

(x_train,y_train),(x_test,y_test) = mnist.load_data()

x_train = x_train.astype('float')/255
x_test = x_test.astype('float')/255

x_train = np.reshape(x_train,[60000,784])
y_train = np.reshape(y_train,[60000,1])

x_test = np.reshape(x_test,[10000,784])
y_test = np.reshape(y_test,[10000,1])

z = np.zeros((60000,10))
for i in range(60000):
    z[i][y_train[i]] = 1
y_train = z

z = np.zeros((10000,10))
for i in range(10000):
    z[i][y_test[i]] = 1
y_test = z

del z

In [769]:
weights1 = np.random.rand(784,128) * 0.1
weights2 = np.random.rand(128,64) * 0.1
weights3 = np.random.rand(64,10) * 0.1
bias1 = np.zeros((1,128))
bias2 = np.zeros((1,64))
bias3 = np.zeros((1,10))

In [770]:
def normalize(x):
    num = (x - np.min(x,axis=1).reshape(len(x),1))
    den = (np.max(x,axis=1).reshape(len(x),1) - np.min(x,axis=1).reshape(len(x),1))
    return num/den

In [771]:
def softmax(x):
    X = normalize(x)
    e_x = np.exp(X) 
    soft = e_x / (np.sum(e_x,axis=1).reshape(len(x),1))
    return soft

In [772]:
def tanh(x):
    X = normalize(x)
    t = (np.exp(X)-np.exp(-X)) / (np.exp(X)+np.exp(-X))
    return t

In [773]:
def dtanh(x):
    t=tanh(x)
    dt=1-t**2
    return dt

In [774]:
def relu(X):
    x = normalize(X)
    return x * (x > 0)

In [775]:
def drelu(X):
    x = normalize(X)
    return 1. * (x > 0)

In [776]:
def update(x,y,w1,b1,w2,b2,w3,b3,lr):
    
    z1 = np.dot(x,w1) + b1
    a1 = np.array(relu(z1))
    
    z2 = np.dot(a1,w2) + b2
    a2 = np.array(relu(z2))
    
    z3 = np.dot(a2,w3) + b3
    a3 = np.array(softmax(z3))
    
    dz3 = a3-y
    dw3 = (1/len(x)) * np.dot(a2.T,dz3)
    db3 = (1/len(x)) * np.sum(dz3,axis=0,keepdims=True)
    
    dz2 = np.dot(dz3,w3.T) * drelu(z2)
    dw2 = (1/len(x)) * np.dot(a1.T,dz2)
    db2 = (1/len(x)) * np.sum(dz2,axis=0,keepdims=True)
    
    dz1 = np.dot(dz2,w2.T) * drelu(z1)
    dw1 = (1/len(x)) * np.dot(x.T,dz1)
    db1 = (1/len(x)) * np.sum(dz1,axis=0,keepdims=True)
    
    w1 -= lr*dw1
    b1 -= lr*db1
    w2 -= lr*dw2
    b2 -= lr*db2
    w3 -= lr*dw3
    b3 -= lr*db3
    
    return [w1,b1,w2,b2,w3,b3]

In [779]:
for j in range(600):
    x_new = np.array([x_train[i] for i in range((100*j),(100*(j+1)))]).reshape(100,784)
    y_new = np.array([y_train[i] for i in range((100*j),(100*(j+1)))]).reshape(100,10)
    [weights1,bias1,weights2,bias2,weights3,bias3] = update(x_new,y_new,weights1,bias1,weights2,bias2,weights3,bias3,0.001)

In [783]:
xt_new = np.array([x_test[i] for i in range(100)]).reshape(100,784)
yt_new = np.array([y_test[i] for i in range(100)]).reshape(100,10)

zt1 = np.dot(xt_new,weights1) + bias1
at1 = np.array(relu(zt1))

zt2 = np.dot(at1,weights2) + bias2
at2 = np.array(relu(zt2))

zt3 = np.dot(at2,weights3) + bias3
at3 = np.array(softmax(zt3))

preds = np.argmax(at3,axis=1).reshape(len(at3),1)
truth = np.argmax(yt_new,axis=1).reshape(len(yt_new),1)

c = np.equal(preds,truth)
num = np.squeeze(np.sum(c,axis=0))
den = len(preds)
acc = (num/den) * 100
print(acc,"\b%")

66.0 %
