In [1]:
import numpy as np
import keras
from keras.datasets import fashion_mnist
from matplotlib import pyplot as plt

Using TensorFlow backend.


In [2]:
def sigmoid(a):
    s = 1/(1+np.exp(-a))
    return s

def derivative_sigmoid(a):
    ds = sigmoid(a) *(1-sigmoid (a))
    return ds

def tanh(a):
    t=(np.exp(a)-np.exp(-a))/(np.exp(a)+np.exp(-a))
    return t

def derivative_tanh(a):
    dt=1-tanh(a)**2
    return dt


def softmax(a):
    return np.exp(a) / np.sum(np.exp(a), axis=0) #expA (axis=0, keepdims=True)

In [3]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [4]:
fashion_mnist = keras.datasets.fashion_mnist
(x_train_orig, y_train_orig), (x_test_orig, y_test_orig) = fashion_mnist.load_data()

In [5]:
def initialize_parameters(layer_dimensions):

    #np.random.seed(0)
    parameters = {}
    L = len(layer_dimensions)            # number of layers in the network

    for k in range(1, L):
        
        parameters['w' + str(k)] = np.random.randn(layer_dimensions[k], layer_dimensions[k-1]) 
        parameters['b' + str(k)] = np.zeros((layer_dimensions[k], 1))
        
    return parameters

In [6]:
def agrregation_forward(h, w, b):
    
    a = np.dot(w, h) + b
    temp = (h,w,b)
    
    return a ,temp

In [7]:
def activation_forward(h_prev, w, b, activation):
        
    
    if activation == "sigmoid":

        a, linear_temp = agrregation_forward(h_prev, w, b)
        h = sigmoid(a)
    
    elif activation == "tanh":
        a, linear_temp = agrregation_forward(h_prev, w, b)
        h = tanh(a)
        
    elif activation == "softmax":
        a, linear_temp = agrregation_forward(h_prev, w, b)
        h = softmax(a)
    
    
    temp = (linear_temp, a)

    return h, temp

In [8]:
def forward_pass(x, parameters):

    temps = []
    h = x
    L = len(parameters) // 2                  # number of layers in the neural network
    
    for k in range(L-1):
        l = k+1
        h_prev = h 
        h,temp = activation_forward(h_prev, parameters['w'+str(l)], parameters['b'+str(l)], activation="sigmoid")
        temps.append(temp)
    
    
    hL,temp1 = activation_forward(h, parameters['w'+str(L)], parameters['b'+str(L)], activation="softmax")
    temps.append(temp1)
    
            
    return hL, temps

In [9]:
def cost_function(yhat, y):   
    m = y.shape[1] # no. of examples
  
    product_sum = np.sum((y *np.log(yhat)), axis = 0)
    cost = -1/m*np.sum(product_sum)
    
    return cost

In [10]:
def agrregation_backward(dL_da, temp):
    
    h_prev, w, b = temp 
    m = h_prev.shape[1]
    dL_dh_prev = np.dot(w.T, dL_da)
    
    dL_dw = 1/m*np.dot(dL_da, h_prev.T)
    dL_db = 1/m*np.sum(dL_da, axis=1, keepdims=True)
     

    return dL_dh_prev, dL_dw, dL_db

In [11]:
def activation_backward(dL_dh, temp, activation):

    linear_temp, a = temp
    
    if activation == "sigmoid":
        ds = derivative_sigmoid(a)
        dL_da = dL_dh * ds
       
        dL_dh_prev, dL_dw, dL_db = agrregation_backward(dL_da, linear_temp)    
        
    elif activation == "tanh":
        dt = derivative_tanh(a)
        dL_da = dL_dh * dt

        dL_dh_prev, dL_dw, dL_db = agrregation_backward(dL_da, linear_temp)    
    
    return dL_dh_prev, dL_dw, dL_db

In [12]:
def backward_pass(yhat, y, temps):
    
    grads = {}
    L = len(temps) # the number of layers
    m = y.shape[1]

# el = one hot vector
    el = y
    dL_dyhat = -(1/yhat)*el
    dL_daL  = -(el - yhat)
    current_temp = temps[L-1]
    linear_tempL,aL = current_temp
    
    hL_prev, wL, bL = linear_tempL
    m = hL_prev.shape[1]

    dL_dhL_prev = np.dot(wL.T, dL_daL)
    
    dL_dwL = 1/m*np.dot(dL_daL, hL_prev.T)
    dL_dbL = 1/m*np.sum(dL_daL, axis=1, keepdims=True)

    
    grads["dL_dh" + str(L-1)] = dL_dhL_prev
    grads["dL_dw" + str(L)]      = dL_dwL
    grads["dL_db" + str(L)] = dL_dbL
    
    # Loop from l=L-2 to l=0
    for l in reversed(range(L-1)):
        #print(l)
        current_temp = temps[l]
        dL_dh_prev, dL_dw, dL_db = activation_backward(grads["dL_dh" + str(l+1)], current_temp, "sigmoid")
        grads["dL_dh" + str(l)] = dL_dh_prev
        grads["dL_dw" + str(l + 1)] = dL_dw
        grads["dL_db" + str(l + 1)] = dL_db

    return grads

In [13]:
def parameter_update(parameters, grads, learning_rate):
    
    L = len(parameters) // 2 # number of layers in the neural network

    for l in range(L):
        parameters["w" + str(l+1)] = parameters["w" + str(l+1)]-learning_rate*grads["dL_dw" + str(l + 1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)]-learning_rate*grads["dL_db" + str(l + 1)]

    return parameters

In [None]:
def parameter_update(parameters, grads, learning_rate):
    
    L = len(parameters) // 2 # number of layers in the neural network

    for l in range(L):
        parameters["w" + str(l+1)] = parameters["w" + str(l+1)]-learning_rate*grads["dL_dw" + str(l + 1)]
        parameters["b" + str(l+1)] = parameters["b" + str(l+1)]-learning_rate*grads["dL_db" + str(l + 1)]

    return parameters

In [96]:
def predict(x, y, parameters):
    x =x.T
    y =y.T
    
    m = x.shape[1]
    n = len(parameters) // 2 # number of layers in the neural network
    p = np.zeros((1,m))
    
    # Forward propagation
    prob, temps = forward_pass(x, parameters)
    

    predicted_label = np.argmax(prob, axis=0)
    true_label = np.argmax(y, axis=0)
    
    Accuracy = np.sum(predicted_label == true_label)/m
        
#         if probas[0,i] > 0.5:
#             p[0,i] = 1
#         else:
#             p[0,i] = 0
    
    #print results
    #print ("predictions: " + str(p))
    #print ("true labels: " + str(y))
    print("Accuracy: "  + str(Accuracy))
        
    return

In [112]:
# index = 15
# plt.imshow(x_train_orig[index])
# print ("y = " + str(y_train_orig[index])+ ". It's a " + class_names[y_train_orig[index]] +  " picture.")

In [128]:
x_train = np.array([[1,2,3,4],[3,4,5,6],[5,6,7,8]])
y_train = np.array([[0,0,1],[1,0,0],[0,1,0]])
y_train.shape

(3, 3)

In [129]:
no_hidden_layers = 1 # no of hidden layers
no_neuron_hidden = 4 # no. of neurons in each hidden layers
no_neuron_output = 3 # # no. of neurons in each hidden layers

In [130]:
no_of_training_examples = np.shape(x_train)[0]
no_of_testing_examples = np.shape(x_test)[0]
size_input_layer = np.shape(x_train)[1]
size_hidden_layer = no_neuron_hidden
size_output_layer = no_neuron_output

In [131]:
def one_hot_vector_form(labels,size_output_layer):
    no_of_examples = labels.shape[0]
    one_hot_vector = np.zeros((no_of_examples , size_output_layer))
    for i in range(no_of_examples):
        one_hot_vector[i, labels[i]] = 1    
        y = one_hot_vector#.T
    return y  

In [132]:
layer_dimensions = [size_input_layer]+ [size_hidden_layer]*no_hidden_layers+ [size_output_layer]
layer_dimensions

[4, 4, 3]

In [133]:
def L_layer_network(x_train, y_train,layer_dimensions,learning_rate,num_epochs,batchsize,batch_type,print_cost=False):
    

    np.random.seed(1)
    costs = []          
    
    parameters = initialize_parameters(layer_dimensions)
    if batch_type == "SGD":
        batchsize =1
    elif batch_type ==  "Mini_batch":
        batchsize = batchsize
    elif batch_type == "Full_batch":
        batchsize = x_train.shape[0]
        
    total_examples = x_train.shape[0]
    num_steps = total_examples//batchsize
    #print(num_steps)
 
    for i in range(0, num_epochs):
        #print("***********epoch = ",i)
        par_update = 0
        for j in range(num_steps):
            
            start = j*batchsize
            end = start+batchsize
            x = x_train[start:end].T
            y = y_train[start:end].T 
            
            # print("x",x)
            # print("Y",y)
            
            #print("::::::::::::::::::Input_param",parameters)
       
            yhat, temps = forward_pass(x, parameters)
            cost = cost_function(yhat, y)
            grads = backward_pass(yhat,y,temps)
            print("param",grads)
            parameters = parameter_update(parameters, grads, learning_rate)
            #print("    update ",j+1)
            print("param",parameters)
            par_update += 1
        
        #print("par_updated ",par_update,"times")
        print("***********************************************************")
        
    #     #print("cost in iteration ",i," is =",cost)
                
    #         # Print the cost every 100 training example
    #     if print_cost and i % 100 == 0:
    #         print ("Cost after iteration %i: %f" %(i, cost))
    #     if print_cost and i % 100 == 0:
    #         costs.append(cost)
    # # plot the cost
    # plt.plot(np.squeeze(costs))
    # plt.ylabel('cost')
    # plt.xlabel('iterations (per hundreds)')
    # plt.title("Learning rate =" + str(learning_rate))
    # plt.show()
    
    return parameters

In [135]:
parameters = L_layer_network(x_train, y_train, layer_dimensions,0.0075,2,1,"Full_batch",print_cost = True)

param {'dL_dh1': array([[-1.18989369, -0.01633808,  0.95184263],
       [ 0.73167716,  0.81599063, -1.25277271],
       [ 0.49281199,  0.27042426, -0.6125784 ],
       [ 1.08114124, -0.49791067, -0.44018795]]), 'dL_dw2': array([[ 0.00017078,  0.00333458,  0.00059602,  0.00491406],
       [ 0.00067536,  0.02784868,  0.00280611,  0.01796557],
       [-0.00084614, -0.03118326, -0.00340212, -0.02287962]]), 'dL_db2': array([[-0.01773531],
       [ 0.05187744],
       [-0.03414213]]), 'dL_dh0': array([[ 0.05205141,  0.05685563, -0.03386186],
       [-0.28331533, -0.13158553,  0.09519219],
       [ 0.31973125,  0.08916812, -0.07726712],
       [-0.20919026, -0.03208832,  0.03727357]]), 'dL_dw1': array([[-0.00103781, -0.00256596, -0.0040941 , -0.00562225],
       [ 0.02693163,  0.06846938,  0.11000713,  0.15154489],
       [ 0.00227731,  0.00519836,  0.00811941,  0.01104046],
       [ 0.01419887,  0.04329694,  0.07239502,  0.1014931 ]]), 'dL_db1': array([[-0.00152815],
       [ 0.04153775],
  

In [102]:
predict(x_train,y_train,parameters)

Accuracy: 0.3


In [103]:
x_train.shape

(10, 784)

In [104]:
predict(x_test,y_test,parameters)

Accuracy: 0.2
