In [1]:
import numpy as np
import matplotlib.pyplot as plt
from keras.datasets import mnist

(x_train,y_train),(x_test,y_test) = mnist.load_data()
#preprocessing (normalization)

x_train = x_train.reshape(60000,784)/255
x_test = x_test.reshape(10000,784)/255

print(x_train.shape)
print(x_test.shape)

(60000, 784)
(10000, 784)


In [2]:
#input layer = 784 neurons
#hidden layer = 15 neurons
#output layer = 10 neurons

#bias
bias_input_to_hidden, reverse_bias_input_to_hidden   = [0] * 15 , [0] * 15
bias_hidden_to_output, reverse_bias_hidden_to_output = [0] * 10 , [0] * 10

#weight
weight_input_to_hidden, reverse_weight_input_to_hidden = [[0 for i in range(784)]for i in range(15)] , [[0 for i in range(784)]for i in range(15)]
weight_hidden_to_output, reverse_weight_hidden_to_output = [[0 for i in range(15)]for i in range(10)] , [[0 for i in range(15)]for i in range(10)]

#input , output
output_from_middle_layer, input_to_middle_layer = [0] * 15 , [0] * 15
output_from_output_layer , input_to_output_layer = [0] * 10 , [0] * 10

#test input , test output
test_output_from_middle_layer, test_input_to_middle_layer = [0] * 15 , [0] * 15
test_output_from_output_layer, test_input_to_output_layer = [0] * 10 , [0] * 10

In [3]:
#adding random numbers to bias and weights

for i in range(15):
  bias_input_to_hidden[i] = np.random.rand() * 0.1
for i in range(10):
  bias_hidden_to_output[i] = np.random.rand() * 0.1

for i in range(15):
  for j in range(784):
    weight_input_to_hidden[i][j] = np.random.randn()*0.1

for i in range(10):
  for j in range(15):
    weight_hidden_to_output[i][j] = np.random.randn()*0.1

In [4]:
#defining activation functions -> sigmoid , softmax

def sigmoid (x):
  exponent = 1 + np.exp(-x)
  return (1/exponent)

def diff_sigmoid(x):
  sig = sigmoid(x)
  return (1-sig) * sig

#softmax is used at output layer
def softmax(output_array):
  maxi = np.max(output_array) #finding maximum
  _exp = np.exp(output_array - maxi) #finding exponents by reducing maximum from the original array
  sum_exp = np.sum(_exp)
  answer_array = _exp / sum_exp #dividing exponent with sum of array
  return answer_array

#actual and expected are one hot encodings
def delta(actual_number , expected_number , output_from_output_layer, input_to_output_layer, weight_hidden_to_output):
  _sum = 0

  for i in range(10):
    op_diff = output_from_output_layer[i] - expected_number[i]
    weight = weight_hidden_to_output[i][actual_number]
    diff_sig = diff_sigmoid(input_to_output_layer[i])

    _sum += op_diff * weight * diff_sig

  return _sum

In [5]:
def back_propogation(output_from_middle_layer, output_from_output_layer, input_to_middle_layer, input_to_output_layer, expected_number, x_train, learning_rate):
  #getting global bias and weights

  global weight_input_to_hidden
  global weight_hidden_to_output
  global bias_input_to_hidden
  global bias_hidden_to_output

  #changing weight -> going from output to hidden

  for i in range(10):
    _diff = output_from_output_layer[i] - expected_number[i]
    _sig_diff = diff_sigmoid(input_to_output_layer[i])

    for j in range(15):
      reverse_weight_hidden_to_output[i][j]  = _diff * _sig_diff
      weight_hidden_to_output[i][j] -= learning_rate * reverse_weight_hidden_to_output[i][j] * output_from_middle_layer[j]
  
  #changing weight -> going from hidden to input

  for i in range(15):
    _delta = delta(i, expected_number, output_from_output_layer, input_to_output_layer, weight_hidden_to_output)
    _sig_diff = diff_sigmoid(input_to_middle_layer[i])
    for j in range(784):
      reverse_weight_input_to_hidden[i][j] = _delta * _sig_diff
      weight_input_to_hidden[i][j] -= learning_rate * reverse_weight_input_to_hidden[i][j] * x_train[j]
  
  #changing bias -> going from output to hidden

  for i in range(10):
    _diff = output_from_output_layer[i] - expected_number[i]
    _sig_diff = diff_sigmoid(input_to_output_layer[i])

    reverse_bias_hidden_to_output[i] = _diff * _sig_diff
    bias_hidden_to_output[i] -= learning_rate * reverse_bias_hidden_to_output[i]

  #changing bias -> going from hidden to input

  for i in range(15):
    _delta = delta(i, expected_number, output_from_output_layer, input_to_output_layer, weight_hidden_to_output)
    _sig_diff = diff_sigmoid(input_to_middle_layer[i])

    reverse_bias_input_to_hidden[i] = _delta * _sig_diff
    bias_input_to_hidden[i] -= learning_rate * reverse_bias_input_to_hidden[i]



In [6]:
#defining accuracy and error calculation functions

def accuracy(predicted_answer, actual_answer , switch):
  max_pred = np.argmax(predicted_answer, axis=1)
  max_train = np.argmax(actual_answer, axis=1)

  if switch == "train":
    return np.sum(max_pred == max_train) / 100 #batch size = 100
  elif switch == "test":
    return np.sum(max_pred == max_train) / 10000

def sum_of_squares_error(output_from_output_layer,expected_number):
  square = output_from_output_layer - expected_number
  return 0.5 * np.sum(square**2)

def make_round_num(n):
  rounds = [0] * n
  for i in range(n):
    rounds[i] = i
  
  return rounds


In [9]:
learning_rate = 0.3
epochs = 5
input_words = 5 #rounds  within epoch

In [10]:
all_train_accuracy = []
all_train_loss = []

for l in range(epochs):
    print("<---------------epoch no : "+str(l)+"---------------->")

    for k in range(input_words):

        train_prediction = []
        train_answer = []

        print("Round number : "+str(l*input_words+k))

        for j in range(100):
          #input to hidden 
          for i in range(15):

            input_to_middle_layer[i] = np.dot(x_train[k*100+j], weight_input_to_hidden[i]) + bias_input_to_hidden[i]
            output_from_middle_layer[i] = sigmoid(input_to_middle_layer[i])
            


          #hidden to output

          for i in range(10):
            input_to_output_layer[i] = np.dot(output_from_middle_layer, weight_hidden_to_output[i]) + bias_hidden_to_output[i]


          #softmaxing
          
          output_from_output_layer = softmax(input_to_output_layer)

          expected_num = [0]*10 #one hot encoding
          expected_num[y_train[k*100+j]] = expected_num[y_train[k*100+j]] + 1

          train_prediction.append(output_from_output_layer)
          train_answer.append(expected_num)
          back_propogation(output_from_middle_layer,output_from_output_layer, input_to_middle_layer, input_to_output_layer, expected_num, x_train[k*100+j], learning_rate)

        train_acc = accuracy(train_prediction, train_answer, "train")
        train_loss = sum_of_squares_error(output_from_output_layer, expected_num)

        print("train_accuracy = "+str(train_acc))
        print("train_loss     = "+str(train_loss))
            
        all_train_accuracy.append(train_acc)
        all_train_loss.append(train_loss)



          


<---------------epoch no : 0---------------->
Round number : 0
train_accuracy = 0.62
train_loss     = 0.22462521747506714
Round number : 1
train_accuracy = 0.65
train_loss     = 0.29335020260807837
Round number : 2
train_accuracy = 0.67
train_loss     = 0.10042751658328568
Round number : 3
train_accuracy = 0.72
train_loss     = 0.25251673693595605
Round number : 4
train_accuracy = 0.67
train_loss     = 0.3430819701192154
<---------------epoch no : 1---------------->
Round number : 5
train_accuracy = 0.77
train_loss     = 0.04702156243272173
Round number : 6
train_accuracy = 0.82
train_loss     = 0.14324323902644487
Round number : 7
train_accuracy = 0.82
train_loss     = 0.025274022436063772
Round number : 8
train_accuracy = 0.81
train_loss     = 0.1310414648654346
Round number : 9
train_accuracy = 0.83
train_loss     = 0.1448309016868208
<---------------epoch no : 2---------------->
Round number : 10
train_accuracy = 0.91
train_loss     = 0.019430675003945196
Round number : 11
train_ac

In [11]:
test_prediction = []
test_answer = []

for j in range(10000):

  for i in range(15):
    test_input_to_middle_layer[i] = np.dot(x_test[j], weight_input_to_hidden[i]) + bias_input_to_hidden[i]
    test_output_from_middle_layer[i] = sigmoid(test_input_to_middle_layer[i])

  for i in range(10):
    test_input_to_output_layer[i] = np.dot(test_output_from_middle_layer , weight_hidden_to_output[i]) + bias_hidden_to_output[i]

  test_output_from_output_layer = softmax(test_input_to_output_layer)
  expected_number = [0] * 10
  expected_number[y_test[j]]= expected_number[y_test[j]]+1

  test_prediction.append(test_output_from_output_layer)
  test_answer.append(expected_number)

test_acc = accuracy(test_prediction, test_answer, "test")
test_loss = sum_of_squares_error(test_output_from_output_layer , expected_number)

print("test_accuracy = "+str(test_acc))
print("test_loss     = "+str(test_loss))


test_accuracy = 0.8213
test_loss     = 0.0031564438795340334
