In [42]:
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
import random

In [43]:
def sigmoid(x):
    """
    Compute the sigmoid of x

    Arguments:
    x -- A scalar or numpy array of any size.

    Return:
    s -- sigmoid(x)
    """
    s = 1/(1+np.exp(-x))
    return s

In [44]:
def relu(x):
    """
    Compute the relu of x

    Arguments:
    x -- A scalar or numpy array of any size.

    Return:
    s -- relu(x)
    """
    s = np.maximum(0,x)
    
    return s


In [102]:
# return train_X, train_Y, test_X, test_Y, val_X, val_Y
def load_dataset():
    scaled_train_dir = "scaled_chest_xray/train/"
    scaled_test_dir = "scaled_chest_xray/test/"

    scaled_train_pneu = os.listdir(os.path.join(scaled_train_dir, 'PNEUMONIA'))
    scaled_train_normal = os.listdir(os.path.join(scaled_train_dir, 'NORMAL'))
    scaled_test_pneu = os.listdir(os.path.join(scaled_test_dir, 'PNEUMONIA'))
    scaled_test_normal = os.listdir(os.path.join(scaled_test_dir, 'NORMAL'))

    scaled_train = [('PNEUMONIA/' + name, 1) for name in scaled_train_pneu] + [('NORMAL/' + name, 0) for name in scaled_train_normal]
    scaled_test = [('PNEUMONIA/' + name, 1) for name in scaled_test_pneu] + [('NORMAL/' + name, 0) for name in scaled_test_normal]

    random.seed(1)
    random.shuffle(scaled_train) 
    random.shuffle(scaled_test) 

    # data = np.array(list(Image.open("scaled_chest_xray/train/NORMAL/NORMAL-28501-0001.jpeg").getdata()))
    # print(len(data))
    # print(data)
    # print(type(data))
    
    test_X_list = [list(Image.open(scaled_test_dir + image_path).getdata().convert('L')) for image_path, i in scaled_test]
    test_X_list = [[float(value)/255 for value in image_data] for image_data in test_X_list]

    train_X_list = [list(Image.open(scaled_train_dir + image_path).getdata().convert('L')) for image_path, i in scaled_train]
    train_X_list = [[float(value)/255 for value in image_data] for image_data in train_X_list]

    train_X = np.array(train_X_list, dtype=float).T
    test_X = np.array(test_X_list, dtype=float).T
    train_Y = np.array([[float(i) for image_path, i in scaled_train]], dtype=float)
    test_Y = np.array([[float(i) for image_path, i in scaled_test]], dtype=float)

    return (train_X, train_Y, test_X, test_Y)

In [103]:
train_X, train_Y, test_X, test_Y = load_dataset()
#print(train_X[0:3])
#print(train_Y)
print(test_X)
print(test_Y)
print(train_X.shape)
print(train_Y.shape)
print(test_X.shape)
print(test_Y.shape)

[[0.         0.79607843 0.01960784 ... 0.1254902  0.68235294 0.        ]
 [0.         0.78431373 0.03529412 ... 0.1254902  0.70980392 0.        ]
 [0.         0.78039216 0.03137255 ... 0.1254902  0.69019608 0.00784314]
 ...
 [0.         0.05490196 0.06666667 ... 0.11372549 0.45098039 0.        ]
 [0.         0.08235294 0.06666667 ... 0.10588235 0.38823529 0.        ]
 [0.         0.10196078 0.06666667 ... 0.09803922 0.38431373 0.        ]]
[[1. 1. 0. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0.
  0. 1. 0. 0. 1. 1. 1. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 0. 0. 1. 0. 0.
  0. 0. 1. 1. 1. 0. 0. 0. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 0. 0.
  0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0. 0.
  1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 0. 1. 1. 0. 0. 1. 0. 1.
  1. 1. 0. 1. 1. 1. 0. 1. 0. 0. 1. 1. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 1. 1.
  1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0. 1. 0. 1. 0. 1. 1. 1. 0. 0. 0. 0.
  1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 

In [104]:
def initialize_parameters(layer_dims):
    """
    Arguments:
    layer_dims -- python array (list) containing the dimensions of each layer in our network
    
    Returns:
    parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
                    W1 -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
                    b1 -- bias vector of shape (layer_dims[l], 1)
                    Wl -- weight matrix of shape (layer_dims[l-1], layer_dims[l])
                    bl -- bias vector of shape (1, layer_dims[l])
                    
    Tips:
    - For example: the layer_dims for the "Planar Data classification model" would have been [2,2,1]. 
    This means W1's shape was (2,2), b1 was (1,2), W2 was (2,1) and b2 was (1,1). Now you have to generalize it!
    - In the for loop, use parameters['W' + str(l)] to access Wl, where l is the iterative integer.
    """
    
    np.random.seed(2)
    parameters = {}
    number_of_layers = len(layer_dims) # number of layers in the network

    for layer in range(1, number_of_layers):
        parameters['W' + str(layer)] = np.random.randn(layer_dims[layer], layer_dims[layer-1])*0.01
        parameters['b'  + str(layer)] = np.zeros((layer_dims[layer], 1))
        
        print("{}:{}".format('W' + str(layer), parameters['W' + str(layer)].shape))
        print("{}:{}".format('b' + str(layer), parameters['b' + str(layer)].shape))

        assert(parameters['W' + str(layer)].shape == (layer_dims[layer], layer_dims[layer-1]))
        assert(parameters['b'  + str(layer)].shape == (layer_dims[layer], 1))

        
    return parameters

In [105]:
def forward_propagation(X, parameters):
    """
    Argument:
    X -- input data of size (n_x, m)
    parameters -- python dictionary containing your parameters (output of initialization function)
    
    Returns:
    A2 -- The sigmoid output of the second activation
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2"
    """
    # Retrieve each parameter from the dictionary "parameters"
    # And implement Forward Propagation to calculate A2 (probabilities)

    ### START CODE HERE ### (≈ 8 lines of code)
    cache = {}
    number_of_layers = int(len(parameters)/2)-1
    lastA = X
    for layer in range(1, number_of_layers+1):
        Z = np.matmul(parameters['W'+ str(layer)], lastA) + parameters['b' + str(layer)]
        A = relu(Z)
        cache.update({'Z' + str(layer): Z})
        cache.update({'A' + str(layer): A})
        lastA = A 
    Z = np.matmul(parameters['W' + str(number_of_layers+1)], lastA) + parameters['b' + str(number_of_layers+1)]
    print(Z)
    A = sigmoid(Z)
    cache.update({'Z' + str(number_of_layers+1): Z})
    cache.update({'A' + str(number_of_layers+1): A})
    ### END CODE HERE ###
    
    assert(A.shape == (1, X.shape[1]))
    
    return A, cache

In [106]:
def compute_cost(A, Y):
    """
    Computes the cross-entropy cost given in equation (13)
    
    Arguments:
    A2 -- The sigmoid output of the second activation, of shape (1, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)
    
    Returns:
    cost -- cross-entropy cost given equation (13)
    
    """
    
    m = Y.shape[1] # number of example

    # Compute the cross-entropy cost
    ### START CODE HERE ### (≈ 3 lines of code)
    #print(A)
    cost = -(1/m)* float(np.dot(Y, np.log(A.T)) + np.dot(1-Y,np.log(1-A.T)))
    ### END CODE HERE ###

    assert(isinstance(cost, float))

    return cost

In [107]:
def compute_cost_with_regularization(A, Y, parameters, lambd):
    """
    Implement the cost function with L2 regularization. See formula (2) above.
    
    Arguments:
    A3 -- post-activation, output of forward propagation, of shape (output size, number of examples)
    Y -- "true" labels vector, of shape (output size, number of examples)
    parameters -- python dictionary containing parameters of the model
    
    Returns:
    cost - value of the regularized loss function (formula (2))
    """
    m = Y.shape[1]
    
    # This gives you the cross-entropy part of the cost
    cross_entropy_cost = compute_cost(A, Y)

    ### START CODE HERE ### (approx. 2 line)
    number_of_layers = int(len(parameters)/2)-1

    sum_of_all_weights = 0.0
    for layer in range(1, number_of_layers+1):
        sum_of_all_weights = sum_of_all_weights + np.sum(np.square(parameters['W' + str(layer)]))

    L2_regularization_cost = lambd/(2*m) * sum_of_all_weights
    cost = cross_entropy_cost + L2_regularization_cost
    ### END CODER HERE ###

    return cost

In [108]:
def backward_propagation(parameters, cache, X, Y):
    """
    Implement the backward propagation using the instructions above.
    
    Arguments:
    parameters -- python dictionary containing our parameters 
    cache -- a dictionary containing "Z1", "A1", "Z2" and "A2".
    X -- input data of shape (2, number of examples)
    Y -- "true" labels vector of shape (1, number of examples)
    
    Returns:
    grads -- python dictionary containing your gradients with respect to different parameters
    """
    m = X.shape[1]
    
    # First, retrieve W1 and W2 from the dictionary "parameters".
    # Retrieve also A1 and A2 from dictionary "cache".
    # Backward propagation: calculate dW1, db1, dW2, db2. 

    ### START CODE HERE ### (≈ 3 (retrieve) + 6 (back prop) lines of code)
    grads = {}
    number_of_layers = int(len(parameters)/2)-1

    dZ = cache['A' + str(number_of_layers+1)] - Y
    dW = (1/m)*np.matmul(dZ,cache['A' + str(number_of_layers)].T)
    db = (1/m)*np.sum(dZ, axis=1, keepdims = True)
    grads.update({'dW' + str(number_of_layers+1): dW})
    grads.update({'db' + str(number_of_layers+1): db})

    last_dZ = dZ
    for layer in range(number_of_layers, 1, -1):
        dZ = np.matmul(parameters['W' + str(layer+1)].T,last_dZ)* np.heaviside(cache['A' + str(layer)], 1)
        dW = (1/m)*np.matmul(dZ,cache['A' + str(layer-1)].T)
        db = (1/m)*np.sum(dZ, axis=1, keepdims = True)
        grads.update({'dW' + str(layer): dW})
        grads.update({'db' + str(layer): db})
        last_dZ = dZ 
    
    dZ1 = np.matmul(parameters['W2'].T,last_dZ)* np.heaviside(cache['A1'], 1)
    dW1 = (1/m)*np.matmul(dZ1,X.T)
    db1 = (1/m)*np.sum(dZ1, axis=1, keepdims = True)
    grads.update({'dW1': dW1})
    grads.update({'db1': db1})
    
    return grads

In [109]:
def backward_propagation_with_regularization(parameters, cache, X, Y, lambd):
    """
    Implements the backward propagation of our baseline model to which we added an L2 regularization.
    
    Arguments:
    X -- input dataset, of shape (input size, number of examples)
    Y -- "true" labels vector, of shape (output size, number of examples)
    cache -- cache output from forward_propagation()
    lambd -- regularization hyperparameter, scalar
    
    Returns:
    gradients -- A dictionary with the gradients with respect to each parameter, activation and pre-activation variables
    """
    
    m = X.shape[1]
    grads = {}
    number_of_layers = int(len(parameters)/2)-1

    dZ = cache['A' + str(number_of_layers+1)] - Y
    dW = (1/m)*(np.matmul(dZ,cache['A' + str(number_of_layers)].T)+lambd*parameters['W' + str(number_of_layers+1)])
    db = (1/m)*np.sum(dZ, axis=1, keepdims = True)
    grads.update({'dW' + str(number_of_layers+1): dW})
    grads.update({'db' + str(number_of_layers+1): db})

    last_dZ = dZ
    for layer in range(number_of_layers, 1, -1):
        dZ = np.matmul(parameters['W' + str(layer+1)].T,last_dZ)* np.heaviside(cache['A' + str(layer)], 1)
        dW = (1/m)*(np.matmul(dZ,cache['A' + str(layer-1)].T)+lambd*parameters['W' + str(layer)])
        db = (1/m)*np.sum(dZ, axis=1, keepdims = True)    #(1,1)
        grads.update({'dW' + str(layer): dW})
        grads.update({'db' + str(layer): db})
        last_dZ = dZ 
    
    dZ1 = np.matmul(parameters['W2'].T,last_dZ)* np.heaviside(cache['A1'], 1)
    dW1 = (1/m)*(np.matmul(dZ1,X.T)+lambd*parameters['W1'])
    db1 = (1/m)*np.sum(dZ1, axis=1, keepdims = True)
    grads.update({'dW1': dW1})
    grads.update({'db1': db1})
    
    return grads

In [110]:
def update_parameters(parameters, grads, learning_rate):
    """
    Updates parameters using the gradient descent update rule given above
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    grads -- python dictionary containing your gradients 
    
    Returns:
    parameters -- python dictionary containing your updated parameters 
    """
    # Retrieve each parameter from the dictionary "parameters" and "grads"
    # Update rule for each parameter
    number_of_layers = int(len(parameters)/2)-1
    for layer in range(1, number_of_layers+1):
        parameters['W'+str(layer)] = parameters['W'+str(layer)] - learning_rate*grads['dW'+str(layer)]
        parameters['b'+str(layer)] = parameters['b'+str(layer)] - learning_rate*grads['db'+str(layer)]
    
    # W1 = parameters["W1"]
    # b1 = parameters["b1"]
    # W2 = parameters["W2"]
    # b2 = parameters["b2"]

    # dW1 = grads["dW1"]
    # db1 = grads["db1"]
    # dW2 = grads["dW2"]
    # db2 = grads["db2"]

    # W1 = W1 - learning_rate*dW1
    # b1 = b1 - learning_rate*db1
    # W2 = W2 - learning_rate*dW2
    # b2 = b2 - learning_rate*db2
    
    # parameters = {"W1": W1,
    #               "b1": b1,
    #               "W2": W2,
    #               "b2": b2}
    


    return parameters

In [111]:
def nn_model(X, Y, layer_dims, learning_rate, lambd, num_iterations = 10000, print_cost=False):
    costs=[]
    regularization = False
    # Initialize parameters
    # Loop (gradient descent)
    # Print every 1000 th cost to console, e.g. using print("Cost after iteration {}: {:.2e}".format(i, cost))
    ### START CODE HERE ### (≈ 12 lines of code)
    parameters = initialize_parameters(layer_dims)
    #return parameters
    for i in range(num_iterations):
        A, cache = forward_propagation(X, parameters)
        cost = []
        if regularization:
            cost = compute_cost_with_regularization(A, Y, parameters, lambd)
        else:
            cost = compute_cost(A, Y)
        costs.append(cost)
        if print_cost == True:
            if (i+1) % 1 == 0:
                print("Cost after iteration {}: {:.2e}".format(i+1, cost))

        grads = {}
        if regularization:
            grads = backward_propagation_with_regularization(parameters, cache, X, Y, lambd)
        else:
            grads = backward_propagation(parameters, cache, X, Y)
        parameters = update_parameters(parameters, grads, learning_rate)
    ### END CODE HERE ###

    # Returns parameters learnt by the model. They can then be used to predict output
    return (parameters, costs)

In [112]:
def predict(parameters, X):
    """
    Using the learned parameters, predicts a class for each example in X
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    X -- input data of size (n_x, m)
    
    Returns
    predictions -- vector of predictions of our model (red: 0 / blue: 1)
    """
    
    # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
    ### START CODE HERE ### (≈ 2 lines of code)
    A2, cache = forward_propagation(X, parameters)
    predictions = np.heaviside(A2-0.5, 1)
    ### END CODE HERE ###
    
    return predictions

In [113]:
layer_dims = [train_X.shape[0], 100,train_Y.shape[0]]
learning_rate = 0.005
lambd = 0.2
number_of_iterations = 600

# Build a model with a n_h-dimensional hidden layer
parameters, costs = nn_model(train_X, train_Y, layer_dims, lambd, learning_rate, num_iterations = number_of_iterations, print_cost=True)

plt.plot(costs)
plt.ylabel('cost')
plt.xlabel('iterations: {}'.format(str(number_of_iterations)))
plt.title("alpha: {}; lambda: {}; layers: {}".format(str(learning_rate),str(lambd), str(layer_dims)))
plt.show()

W1:(100, 50176)
b1:(100, 1)
W2:(1, 100)
b2:(1, 1)
[[0.10503503 0.1045919  0.11057855 ... 0.08506437 0.09809489 0.10757789]]
Cost after iteration 1: 6.71e-01
[[3.14518017 3.18469824 3.2049382  ... 2.67046369 3.06805726 3.14217366]]
Cost after iteration 2: 8.22e-01
[[0.09481568 0.21971688 0.17749168 ... 0.15611727 0.09259702 0.13134133]]
Cost after iteration 3: 6.50e-01
[[2.94172096 3.15128274 3.1095065  ... 2.60495712 2.87254717 2.99171069]]
Cost after iteration 4: 7.85e-01
[[0.01722481 0.25127632 0.16303577 ... 0.15356788 0.04306498 0.08420284]]
Cost after iteration 5: 6.47e-01
[[2.90028095 3.27416497 3.17500394 ... 2.67204707 2.83711263 3.00144829]]
Cost after iteration 6: 7.80e-01
[[-0.02208058  0.3298368   0.19034298 ...  0.18948104  0.0187446
   0.07491302]]
Cost after iteration 7: 6.35e-01
[[2.77130623 3.30809044 3.15081902 ... 2.66520215 2.71620408 2.92410922]]
Cost after iteration 8: 7.58e-01
[[-0.07571886  0.38632834  0.19664796 ...  0.20600538 -0.02733253
   0.04910006]]
Cost 

KeyboardInterrupt: 

In [None]:
predictions = predict(parameters, train_X)
print ('Accuracy: %d' % float((np.dot(train_Y,predictions.T) + np.dot(1-train_Y,1-predictions.T))/float(train_Y.size)*100) + '%')

[[ 1.97421204  4.37614562 -1.709526   ...  6.12680574  2.86820416
   4.45814039]]
Accuracy: 92%


In [None]:
predictions = predict(parameters, test_X)
print ('Accuracy: %d' % float((np.dot(test_Y,predictions.T) + np.dot(1-test_Y,1-predictions.T))/float(test_Y.size)*100) + '%')

[[ 5.90446641e-01  2.89878590e+00  1.30834878e-02 -9.42141429e-01
   3.72173790e+00 -1.88474823e-01  2.10689003e+00  1.80423592e+00
   7.01022540e-02  2.60713790e+00 -2.33402221e-01  4.60745346e-01
   3.11786082e+00  4.89063452e+00  1.77590822e+00  2.77637214e+00
   1.20700822e+00  3.36644947e+00  3.88069316e-01  3.96091937e+00
   2.50724040e+00  6.47639775e-01  2.43277749e+00  1.45653676e+00
   1.31400629e+00  2.27692867e+00 -1.35523440e+00  3.09096632e-01
   3.61478693e+00  8.04580358e-01 -2.69985230e-01  2.81345164e+00
   4.08682136e+00  2.90429095e+00  2.04105225e+00  3.92847251e+00
  -1.92563916e-01  7.82747996e-01  2.03635226e+00 -4.53384975e-01
  -1.58618715e-01 -3.60861623e-01 -9.55404157e-02  3.00263787e+00
   1.45508188e+00 -7.44494112e-01  4.03141684e+00 -1.31229274e+00
   3.04574901e+00  2.26062873e+00 -7.72806456e-01 -1.21747801e+00
  -1.64314566e+00  5.04734366e+00  3.30109671e+00  3.02885382e+00
  -1.91507086e+00  3.16262820e+00  1.29007903e+00  2.99058097e+00
  -4.64802