In [0]:
import keras
from keras.datasets import mnist
from keras import backend
import numpy as np

In [0]:
learn_rate = 0.005
batch_size = 40
epoch = 15

In [0]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [0]:
x_train = x_train.reshape(x_train.shape[0], 28*28)
y_train = keras.utils.to_categorical(y_train, 10)

In [0]:

x_test = x_test.reshape(x_test.shape[0],28*28)
y_t = y_test
y_test = keras.utils.to_categorical(y_test,10)

In [0]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train / 255
x_test = x_test / 255

In [0]:
x_train = x_train.reshape(x_train.shape[0], -1).T
x_test = x_test.reshape(x_test.shape[0], -1).T
y_train = y_train.T
y_test = y_test.T

In [0]:
def sigmoid(x):
  return 1/(1+np.exp(-x))

In [0]:
def sigmoid_p(x):
  return sigmoid(x)*(1-sigmoid(x))

In [0]:
def cross_entropy(y, y_cap):
  return - np.mean(y * np.log(y_cap))

In [0]:
def mini_batch(x, y, batch_size):
  for i in range(0, x.shape[0] - batch_size + 1, batch_size):
    h = slice(i, i + batch_size)
    yield x[h], y[h]

In [0]:
def softmax(x):
  x = x - np.max(x)
  return np.exp(x) / np.sum(np.exp(x), axis = 0) 

In [0]:
def forward_prop(w,b,x):
  op = np.dot(w,np.transpose(x)) + b
  return sigmoid(op)

In [0]:
def train(x_train, y_train, x_test, y_test, epochs, learn_rate):
  neuron_x = x_train.shape[0]
  neuron_h = 64
  neuron_y = y_train.shape[0]
  
  w1= np.random.randn(neuron_h, neuron_x) * 0.01
  b1 = np.zeros(shape = (neuron_h, 1))
  w2 = np.random.randn(neuron_y, neuron_h) * 0.01
  b2 = np.zeros(shape = (neuron_y, 1))
  
  for k in range(epoch):
    for i in mini_batch(x_train.T, y_train.T, batch_size):
      x, y = i
      x = x.T
      y = y.T
      m = x.shape[0]
      
      z1 = np.dot(w1, x) + b1
      a1 = sigmoid(z1)
      z2 = np.dot(w2, a1) + b2
      a2 = softmax(z2)
      cost = cross_entropy(y, a2)
      dz2 = a2 - y
      dw2 = (1/m) * np.matmul(dz2, a1.T)
      db2 = (1/m) * np.sum(dz2, axis = 1, keepdims = True)
      da1 = np.dot(w2.T, dz2)
      dz1 = da1 * sigmoid_p(z1)
      dw1 = (1/m) * np.dot(dz1, x.T)
      db1 = (1/m) * np.sum(dz1, axis = 1, keepdims = True)
      
      w2 = w2 - learn_rate * dw2
      b2 = b2 - learn_rate * db2
      w1 = w1 - learn_rate * dw1
      b1 = b1 - learn_rate * db1
    print('Epoch: ', k)
    print('cost: ', cost)
    
  z1 = np.dot(w1, x_train) + b1
  a1 = sigmoid(z1)
  z2 = np.dot(w2, a1) + b2
  a2 = softmax(z2)
  
  z3 = np.dot(w1, x_test) + b1
  a3 = sigmoid(z3)
  z4 = np.dot(w2,a3) + b2
  a4 = softmax(z4)
  
  a4 = a4.T
  a2 = a2.T
  y_train = y_train.T
  y_test = y_test.T
  
  y_train_pred = np.zeros_like(y_train)
  y_train_pred[np.arange(len(a2)), a2.argmax(1)] = 1
  
  y_test_pred = np.zeros_like(y_test)
  y_test_pred[np.arange(len(a4)), a4.argmax(1)] = 1
  
  print('training accuracy: {}%'.format(100 - np.mean(np.abs(y_train_pred - y_train)) * 100))
  print('train accuracy: {}%'.format(100 - np.mean(np.abs(y_test_pred - y_test)) * 100))
  
  d = {
        'cost': cost,
        'Y_test_prediction': a4,
        'y_test_pred': a2
      }
  return d

In [42]:
classifier = train(x_train, y_train, x_test, y_test, 12, 0.1)

Epoch:  0
cost:  0.22986920765006083
Epoch:  1
cost:  0.22614659463552328
Epoch:  2
cost:  0.2102585839612373
Epoch:  3
cost:  0.17538583382750084
Epoch:  4
cost:  0.13721420318377342
Epoch:  5
cost:  0.10932592855305284
Epoch:  6
cost:  0.08960575082539725
Epoch:  7
cost:  0.07493444175776594
Epoch:  8
cost:  0.06388916808901333
Epoch:  9
cost:  0.05553582872024876
Epoch:  10
cost:  0.049132719537117675
Epoch:  11
cost:  0.044127865082804994
Epoch:  12
cost:  0.04013338404105651
Epoch:  13
cost:  0.03688059084113738
Epoch:  14
cost:  0.034181437197722604
training accuracy: 97.52766657620668%
train accuracy: 97.60800004005432%
