## Downloading the datasets and preprocessing the inputs

In [1]:
from keras.datasets import mnist
from keras.utils import np_utils

(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train = x_train.reshape(x_train.shape[0],28*28)
x_train = x_train.astype('float32')
x_train /=255
y_train = np_utils.to_categorical(y_train)
x_train1 = x_train[0:1000]
y_train1 = y_train[0:1000]

x_test = x_test.reshape(x_test.shape[0],28*28)
x_test = x_test.astype('float32')
x_test /= 255
y_test = np_utils.to_categorical(y_test)

2022-02-14 08:26:11.141594: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-02-14 08:26:11.141619: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
# x_train1.shape

In [3]:
# mini_batches = [x_train1[k:k+32] for k in range(0, 1000, 32)]
# (mini_batches[0].shape)

In [4]:
# head1, *tail1 = y_train
# head1.size
# head, *tail = x_train
# head1.shape

# Building the Neural Nets and Activation Layers

In [5]:
import numpy as np

class NeuralNets:
    def __init__(self,in_dim, out_dim):
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.w = np.random.randn(out_dim, in_dim) / np.sqrt(in_dim + out_dim)
        self.b = np.random.randn(out_dim,1) / np.sqrt(in_dim + out_dim)
        
    def propagate_forward(self, x_train):
        """ forward propagating the inputs """
        self.x_train = x_train
        return np.dot(self.w,x_train) + self.b
    
    def propagate_backward(self, output_loss, mini_batche_y, l_rate):
        dW = np.dot(output_loss, self.x_train.T)
        dB = np.array(np.sum(output_loss, axis=1).reshape((output_loss.shape[0],1)))
#         dB = output_loss
#         dB = np.tile(output_loss, (1,len(mini_batche_y[0])))
#         dB = np.sum(output_loss, axis = 0)
#         print(dB.shape)
        self.w -= (l_rate/len(mini_batche_y[1])) * dW
        self.b -= (l_rate/len(mini_batche_y[1])) * dB
        input_loss = np.dot(self.w.T, output_loss)
        return input_loss
    
class SquashingLayer:
    def __init__(self, squashing_func, squashing_derivative_func):
        self.squashing_func = squashing_func
        self.squashing_derivative_func = squashing_derivative_func
        
    def propagate_forward(self, x_train):
        self.x_train = x_train
        return self.squashing_func(self.x_train)
    
    def propagate_backward(self, output_loss,mini_batche_y, l_rate):
        return np.multiply(output_loss,self.squashing_derivative_func(self.x_train))


# Building Final Output Layers

In [6]:
class FinalSoftmaxLayer:
    def __init__(self, in_dim):
        self.in_dim = in_dim
    
    def propagate_forward(self, x_train):
        self.output = np.exp(x_train) / np.sum(np.exp(x_train), axis=0)
        return self.output
    
    def propagate_backward(self, predicted_output,mini_batche_y, l_rate):
        return predicted_output - mini_batche_y
        

# Activations Functions and its Derivatives

In [7]:
# Defintions of squashing functions and its derivatives

def relu_func(input_x):
    return np.maximum(input_x,0)

def relu_derivative_func(input_x):
    return np.array(input_x >= 0).astype('int')

def sigmoid_func(input_x):
    return 1.0 / (1.0 + np.exp(-input_x))

def sigmoid_derivative_func(z): 
    return sigmoid_func(z)*(1-sigmoid_func(z))


#     def softmax(self, x, derivative=False):
#         # Numerically stable with large exponentials
#         exps = np.exp(x - x.max())
#         if derivative:
#             return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
#         return exps / np.sum(exps, axis=0)

# def softmax_func(x_train):
#     output = np.exp(x_train) / np.sum(np.exp(x_train), axis=0)
#     return self.output

def softmax_func(x):
    exps = np.exp(x - x.max())
    return exps / np.sum(exps, axis=0)

def softmax_derivative_func(x):
    return 1
#     exps = np.exp(x - x.max())
#     return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))

# Loss Functions and its derivatives

In [8]:
# definitions of Loss functions and its derivatives

def mean_squared_error(actual_output, predicted_output):
    return np.mean(np.power(actual_output - predicted_output, 2))

def derivative_mean_squared(actual_output, predicted_output):
    return 2 * (actual_output - predicted_output) / predicted_output.size

# def softmax_loss()
def compute_loss(X, Y):

    L_sum = np.sum(np.multiply(X, np.log(Y)))
    m = X.shape[1]
    L = -(1/m) * L_sum

    return L

def derivative_softmax_loss(actual_output, predicted_output):
    return predicted_output - actual_output
#     return 2 * (predicted_output - actual_output) / predicted_output.shape[0]

# Stacking layers and training the network

In [9]:
network = [
    NeuralNets(28*28, 128),
    SquashingLayer(relu_func, relu_derivative_func),
    NeuralNets(128,10),
    SquashingLayer(sigmoid_func, sigmoid_derivative_func),
    FinalSoftmaxLayer(10)
    
]

no_of_epochs = 40
l_rate = 3.0
mini_batch_size = 32
#training
samples = len(x_train1)
for epoch in range(no_of_epochs):
#     random.shuffle(x_train1)
    err = 0
    for k in range(0, samples, mini_batch_size):
        # forward propagation
        mini_batche_x = x_train1[k:k+mini_batch_size].T
        mini_batche_y = y_train1[k:k+mini_batch_size].T
        output = mini_batche_x
        for layer in network:
            output = layer.propagate_forward(output)
        
#         err += mean_squared_error(mini_batche_y, output)
        err += compute_loss(mini_batche_y, output)
        
#         error = derivative_softmax_loss(mini_batche_y, output)
        for layer in reversed(network):
                    error = layer.propagate_backward(output, mini_batche_y,l_rate)
        
        # calculate average error on all samples
    err /= samples
    print('epoch %d/%d   error=%f' % (epoch+1, no_of_epochs, err))
        
    

ValueError: operands could not be broadcast together with shapes (10,32) (128,32) 

In [None]:
# import numpy as np
# a=np.array([[1,2],[1,2]])
# b=np.sum(a, axis = 0)
# b
# nabla_B = [np.tile(np.zeros(b.shape), (1, self.mini_batch_size))
#                 for b in self.biases]