### Import necessary libraries

In [1]:
# for computation numerical computations
import numpy as np
import h5py
import math

# model visualization
import matplotlib.pyplot as plt # for data visualization
import gradio as gr # build model user interface

# helper functions
from utils import * # helper function

### Load and normalize data

In [2]:
# load data
train_x_orig , train_y_orig , test_x_orig , test_y_orig, classes = load_data()

# reshape training and test examples
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.

### initialize weight and bias

In [3]:
def initialization_parameters(layer_dims):
    
    # layer_dims is vector of size (n,1)
    
    parameters = {} # store weights and bias
    
    np.random.seed(1)
    L = len(layer_dims)
    
    for l in range(1,L):
        
        # initialize weight for layers
        parameters["W" + str(l)] = np.random.randn(layer_dims[l],layer_dims[l-1]) / np.sqrt(layer_dims[l-1])
        
        # initialize bias for layers
        parameters["b" + str(l)] = np.zeros((layer_dims[l],1))
        
        assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
        assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
        
    return parameters

### initialize parameters for adam gradient

In [6]:
def initialize_adam(parameters):
    
    L = len(parameters) // 2 # number of layers in the neural networks
    v = {}
    s = {}
    
    # Initialize velocity
    for l in range(1, L + 1):
        v["dW" + str(l)] = np.zeros_like(parameters["W" + str(l)])
        v["db" + str(l)] = np.zeros_like(parameters["b" + str(l)])
        
        s["dW" + str(l)] = np.zeros_like(parameters["W" + str(l)])
        s["db" + str(l)] = np.zeros_like(parameters["b" + str(l)])

    return v,s

### generate random batches of data

In [11]:
def mini_batch_normalize(X, Y, mini_batch_size = 64, seed = 0):
    
    np.random.seed(seed)            # To make your "random" minibatches the same as ours
    m = X.shape[1]                  # number of training examples
    mini_batches = []
        
    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[:, permutation]
    shuffled_Y = Y[:, permutation].reshape((1, m))
    
    inc = mini_batch_size

    # Step 2 - Partition (shuffled_X, shuffled_Y).
    
    num_complete_minibatches = math.floor(m / mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[:,k * mini_batch_size:(k + 1) * mini_batch_size]
        mini_batch_Y = shuffled_Y[:,k * mini_batch_size:(k + 1) * mini_batch_size]
        
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # For handling the end case (last mini-batch < mini_batch_size i.e less than 64)
    if m % mini_batch_size != 0:
        end = m - mini_batch_size * math.floor(m / mini_batch_size)
        mini_batch_X = shuffled_X[:,num_complete_minibatches * mini_batch_size:]
        mini_batch_Y = shuffled_Y[:,num_complete_minibatches * mini_batch_size:]

        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
        
    return mini_batches

In [59]:
def update_parameters_with_adam(parameters, grads, s,v, t, beta1=0.9, beta2=0.999,
                                learning_rate=0.01,epsilon = 1e-8):
    
    L = len(parameters) // 2                 # number of layers in the neural networks
    v_corrected = {}                         # Initializing first moment estimate, python dictionary
    s_corrected = {}                         # Initializing second moment estimate, python dictionary
    
    # Perform Adam update on all parameters
    for l in range(L):
        # Moving average of the gradients. Inputs: "v, grads, beta1". Output: "v".

        v["dW" + str(l + 1)] = beta1 * v["dW" + str(l + 1)] + (1 - beta1) * grads['dW' + str(l + 1)]
        v["db" + str(l + 1)] = beta1 * v["db" + str(l + 1)] + (1 - beta1) * grads['db' + str(l + 1)]

        # Compute bias-corrected first moment estimate. Inputs: "v, beta1, t". Output: "v_corrected".
        v_corrected["dW" + str(l + 1)] = v["dW" + str(l + 1)] / (1 - np.power(beta1, t))
        v_corrected["db" + str(l + 1)] = v["db" + str(l + 1)] / (1 - np.power(beta1, t))

        # Moving average of the squared gradients. Inputs: "s, grads, beta2". Output: "s".
        s["dW" + str(l + 1)] = beta2 * s["dW" + str(l + 1)] + (1 - beta2) * np.power(grads['dW' + str(l + 1)], 2)
        s["db" + str(l + 1)] = beta2 * s["db" + str(l + 1)] + (1 - beta2) * np.power(grads['db' + str(l + 1)], 2)
        
        # Compute bias-corrected second raw moment estimate. Inputs: "s, beta2, t". Output: "s_corrected".
        s_corrected["dW" + str(l + 1)] = s["dW" + str(l + 1)] / (1 - np.power(beta2, t))
        s_corrected["db" + str(l + 1)] = s["db" + str(l + 1)] / (1 - np.power(beta2, t))
        
        # Update parameters. Inputs: "parameters, learning_rate, v_corrected, s_corrected, epsilon". Output: "parameters".
        parameters["W" + str(l + 1)] = parameters["W" + str(l + 1)] - learning_rate * v_corrected["dW" + str(l + 1)] / np.sqrt(s_corrected["dW" + str(l + 1)] + epsilon)
        parameters["b" + str(l + 1)] = parameters["b" + str(l + 1)] - learning_rate * v_corrected["db" + str(l + 1)] / np.sqrt(s_corrected["db" + str(l + 1)] + epsilon)

    return parameters, v, s, v_corrected, s_corrected

# compute cost 

In [54]:
def compute_training_cost(AL, Y, parameters, lambd):
    
    m = Y.shape[1]
    L = len(parameters)//2
    parameters_sum = 0
    
    for l in range(0,L):
        parameters_sum += np.sum(np.square(parameters["W" + str(l+1)]))
        
    L2 = lambd * parameters_sum / (2*m)
    
    # Compute loss from aL and y.
    cost = -np.dot(Y,np.log(AL).T) - np.dot(1-Y, np.log(1-AL).T)
    
    cost += L2
    total_cost = np.sum(cost)

    
    return total_cost

In [None]:
def model(X, Y, layer_dims, batch_size = 64, beta1 = 0.9, beta2=0.999,
          learning_rate=0.0075, lambd=0.7, epochs=3000, epsilon=1e-8, print_cost=False, batch_normalize=True):
    
    ''' our model function take all hyperparameter and merge all function
    into one'''

    costs = []
    seed = 10
    m = X.shape[1]
    t=2
   
    
    # parameter initialization
    parameters = initialization_parameters(layer_dims) # weight initialization
    s,v = initialize_adam(parameters)
    
    
    for i in range(1,epochs):
        
        if batch_normalize == True:
            
            cost_total = 0
            seed = seed + 1
            mini_batches = mini_batch_normalize(X,Y,batch_size,seed)

            for minibatch in mini_batches:
        
                (minibatch_X, minibatch_Y) = minibatch
            
                AL, caches = forward_layer(minibatch_X,parameters)
        
                cost_total += compute_training_cost(AL,minibatch_Y,parameters,lambd)
        
                grads = backward_layer(AL,minibatch_Y, caches, lambd)
        
                #parameters = update_parameters(parameters, grads, learning_rate)
                parameters, v, s, _, _ = update_parameters_with_adam(parameters, grads, s, v,t,  beta1, beta2, learning_rate, epsilon)
        
                seed += 1
                cost_avg = cost_total/m
        
                if print_cost and i % 1000 == 0:
                    print("cost after iteration {} {} ".format(i, cost_avg))
                if i % 100 == 0 and i == print_cost:
                    costs.append(cost_total)
                    
        else:
            AL, caches = forward_layer(X,parameters)
        
            cost = 1./m * compute_training_cost(AL,Y,parameters,lambd)
        
            grads = backward_layer(AL, Y, caches, lambd)
        
                #parameters = update_parameters(parameters, grads, learning_rate)
            parameters, v, s, _, _ = update_parameters_with_adam(parameters, grads, s, v,t,  beta1, beta2, learning_rate, epsilon)
        
            if print_cost and i % 100 == 0:
                print("cost after iteration {} {} ".format(i, cost))
            if i % 100 == 0 and i == print_cost:
                costs.append(cost_total)
       
    return parameters , costs

In [None]:
layers_dims =  [12288, 20, 7, 5, 1] #  4-layer model
parameters, cost = model(train_x,train_y_orig,layers_dims, epochs=2500, print_cost=True)

p = predict(train_x,train_y_orig,parameters) # training acuracy
pred_test = predict(test_x, test_y_orig, parameters) # test accuracy

In [None]:
from PIL import Image

my_image = "my_image.jpg" # change this to the name of your image file 
my_label_y = [1] # the true class of your image (1 -> cat, 0 -> non-cat)

num_px= 64
fname = "" + my_image
image = np.array(Image.open(fname).resize((64,64)))
plt.imshow(image)
image = image / 255.
image = image.reshape((1, num_px * num_px * 3)).T

my_predicted_image = predict(image, my_label_y, parameters)

print ("y = " + str(np.squeeze(my_predicted_image)) + ", your L-layer model predicts a \"" + classes[int(np.squeeze(my_predicted_image)),].decode("utf-8") +  "\" picture.")