# Logistic Regression with a Neural Network mindset

In [33]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage

%matplotlib inline

In [34]:
def load_dataset():
    train_dataset = h5py.File('datasets/Tr.h5', "r")
    train_set_x_orig = np.array(train_dataset["images"][:]) 
    train_set_y_orig = np.array(train_dataset["labels"][:]) 

    test_dataset = h5py.File('datasets/Te.h5', "r")
    test_set_x_orig = np.array(test_dataset["images"][:])
    test_set_y_orig = np.array(test_dataset["labels"][:]) 

    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

    X_train = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T / 255.0
    X_test = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T / 255.0

    Y_train = np.eye(5)[train_set_y_orig.flatten()].T 
    Y_test = np.eye(5)[test_set_y_orig.flatten()].T 

    return X_train, Y_train, X_test, Y_test

In [35]:
X_train, Y_train, X_test, Y_test = load_dataset()

In [36]:
def softmax(Z):
    exp_Z = np.exp(Z - np.max(Z, axis=0, keepdims=True)) 
    return exp_Z / np.sum(exp_Z, axis=0, keepdims=True)


In [37]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    cost = -np.sum(Y * np.log(AL + 1e-8)) / m 
    return np.squeeze(cost)


In [38]:
def initialize_parameters(layer_dims):
    np.random.seed(1)
    parameters = {}
    L = len(layer_dims)

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))

    return parameters


In [39]:
def linear_forward(A, W, b):
    Z = np.dot(W, A) + b
    return Z

def linear_activation_forward(A_prev, W, b, activation):
    Z = linear_forward(A_prev, W, b)

    if activation == 'relu':
        A = np.maximum(0, Z)
    elif activation == 'softmax':
        A = softmax(Z)

    return A, Z


In [40]:
def linear_backward(dZ, A_prev, W):
    m = A_prev.shape[1]
    dW = np.dot(dZ, A_prev.T) / m
    db = np.sum(dZ, axis=1, keepdims=True) / m
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

def linear_activation_backward(dA, Z, A_prev, W, activation):
    if activation == 'relu':
        dZ = np.array(dA, copy=True)
        dZ[Z <= 0] = 0
    elif activation == 'softmax':
        dZ = dA 

    dA_prev, dW, db = linear_backward(dZ, A_prev, W)

    return dA_prev, dW, db


In [41]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2

    for l in range(1, L + 1):
        parameters['W' + str(l)] -= learning_rate * grads['dW' + str(l)]
        parameters['b' + str(l)] -= learning_rate * grads['db' + str(l)]

    return parameters


In [42]:
def model(X, Y, layer_dims, learning_rate=0.0075, num_iterations=3000):
    parameters = initialize_parameters(layer_dims)
    costs = []

    for i in range(num_iterations):
        A = X
        caches = []

        for l in range(1, len(layer_dims) - 1):
            A_prev = A
            A, Z = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], 'relu')
            caches.append((A_prev, Z, parameters['W' + str(l)], parameters['b' + str(l)]))

        AL, ZL = linear_activation_forward(A, parameters['W' + str(len(layer_dims) - 1)], 
                                           parameters['b' + str(len(layer_dims) - 1)], 'softmax')
        caches.append((A, ZL, parameters['W' + str(len(layer_dims) - 1)], 
                       parameters['b' + str(len(layer_dims) - 1)]))

        cost = compute_cost(AL, Y)
        costs.append(cost)

        grads = {}
        dA = AL - Y 

        for l in reversed(range(1, len(layer_dims))):
            A_prev, Z, W, b = caches[l-1]
            dA, dW, db = linear_activation_backward(dA, Z, A_prev, W, 'relu' if l != len(layer_dims) - 1 else 'softmax')
            grads['dW' + str(l)] = dW
            grads['db' + str(l)] = db

        parameters = update_parameters(parameters, grads, learning_rate)

        if i % 100 == 0:
            print(f"Cost after iteration {i}: {cost}")

    return parameters


In [43]:
def predict(X, parameters, layer_dims):
    A = X
    for l in range(1, len(layer_dims) - 1):
        A, _ = linear_activation_forward(A, parameters['W' + str(l)], parameters['b' + str(l)], 'relu')

    AL, _ = linear_activation_forward(A, parameters['W' + str(len(layer_dims) - 1)], 
                                      parameters['b' + str(len(layer_dims) - 1)], 'softmax')
    predictions = np.argmax(AL, axis=0)
    return predictions

layer_dims = [X_train.shape[0], 64, 32, 16, 5] 
parameters = model(X_train, Y_train, layer_dims, learning_rate=0.01, num_iterations=1000)

train_predictions = predict(X_train, parameters, layer_dims)
test_predictions = predict(X_test, parameters, layer_dims)

train_accuracy = np.mean(train_predictions == np.argmax(Y_train, axis=0)) * 100
test_accuracy = np.mean(test_predictions == np.argmax(Y_test, axis=0)) * 100

print(f"Train Accuracy: {train_accuracy:.2f}%")
print(f"Test Accuracy: {test_accuracy:.2f}%")


Cost after iteration 0: 1.6094419485160683
Cost after iteration 100: 1.6094390869035748
Cost after iteration 200: 1.609437500178019
Cost after iteration 300: 1.6094361924273595
Cost after iteration 400: 1.6094351149720338
Cost after iteration 500: 1.6094342434255346
Cost after iteration 600: 1.6094333901317834
Cost after iteration 700: 1.6094326130551977
Cost after iteration 800: 1.6094319558735575
Cost after iteration 900: 1.6094313543267271
Train Accuracy: 28.80%
Test Accuracy: 20.00%
