In [541]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import copy
%matplotlib inline

np.random.seed(1)

In [917]:
# define activation functions for forward propagation and their respective derivatives for backward propagation

def sigmoid(x):
    return 1/(1 + np.exp(-x)), x

def sigmoid_backward(gradient, x):
    return gradient * (x * (1 - x))

def relu(x, alpha = 1):
    return np.maximum(alpha * x, x), x

def relu_backward(gradient, x, alpha = 1):
    x_relu = np.greater(x, 0).astype(int)
    x_relu[x == 0] = alpha
    return gradient * x_relu

In [918]:
# initialize parameters
# initializes weights and biases of an n-layer neural network
def initialize_parameters(layer_dims):
    parameters = {}
    layers = len(layer_dims)

    for layer in range(1, layers):
        parameters['W' + str(layer)] = np.random.randn(layer_dims[layer], layer_dims[layer - 1]) * 0.01
        parameters['b' + str(layer)] = np.zeros((layer_dims[layer], 1))

    return parameters

In [919]:
def linear_forward_prop(activations, weights, bias):
    pre_act = np.dot(weights, activations) + bias

    cache = (activations, weights, bias)

    return pre_act, cache

In [920]:
def act_forward_prop(activations, weights, bias, act_fun):
    pre_act, linear_cache = linear_forward_prop(activations, weights, bias)
    post_act, activation_cache = act_fun(pre_act)

    cache = (linear_cache, activation_cache)
    return post_act, cache

In [921]:
def forward_propagation(X, params):
    caches = []
    activations = X
    layers = len(params) // 2

    for layer in range(1, layers):
        activations, cache = act_forward_prop(activations, params['W' + str(layer)], params['b' + str(layer)], relu)
        caches.append(cache)

    output_activations, output_cache = act_forward_prop(activations, params['W' + str(layers)], params['b' + str(layers)], sigmoid)
    caches.append(output_cache)

    return output_activations, caches

In [922]:
def model_cost(output_activations, Y):
    size = Y.shape[1]

    cost = (-1 / size) * np.sum((Y * np.log(output_activations)) + ((1 - Y) * np.log(1 - output_activations)))
    cost = np.squeeze(cost)

    return cost

In [923]:
def linear_back_prop(gradient, cache):
    activations, weights, biases = cache
    size = activations.shape[1]

    weights_deriv = np.dot(gradient, activations.T) / size
    biases_deriv = np.sum(gradient, axis = 1, keepdims = True) / size
    activations_deriv = np.dot(weights.T, gradient)

    return activations_deriv, weights_deriv, biases_deriv

In [924]:
def act_back_prop(gradient, cache, act_fun):
    linear_cache, activation_cache = cache
    post_gradient = act_fun(gradient, activation_cache)
    cost_gradient, weights_deriv, biases_deriv = linear_back_prop(post_gradient, linear_cache)

    return cost_gradient, weights_deriv, biases_deriv

In [925]:
def back_propagation(output_activations, Y, caches):
    gradients = {}
    layers = len(caches)
    size = output_activations.shape[1]
    Y = Y.reshape(output_activations.shape)

    output_act_deriv = - (np.divide(Y, output_activations) - np.divide(1 - Y, 1 - output_activations))

    cache = caches[-1]
    gradient, weights_deriv, biases_deriv = act_back_prop(output_act_deriv, cache, sigmoid_backward)
    gradients['dA' + str(layers - 1)] = gradient
    gradients['dW' + str(layers)] = weights_deriv
    gradients['db' + str(layers)] = biases_deriv

    for layer in reversed(range(layers - 1)):
        cache = caches[layer]
        gradient, weights_deriv, biases_deriv = act_back_prop(gradient, cache, relu_backward)
        gradients['dA' + str(layer)] = gradient
        gradients['dW' + str(layer + 1)] = weights_deriv
        gradients['db' + str(layer + 1)] = biases_deriv

    return gradients

In [926]:
def update_params(params, grads, learning_rate):
    parameters = copy.deepcopy(params)
    layers = len(parameters) // 2

    for layer in range(layers):
        parameters['W' + str(layer + 1)] - (learning_rate * grads['dW' + str(layer + 1)])
        parameters['b' + str(layer + 1)] - (learning_rate * grads['db' + str(layer + 1)])

    return parameters

In [927]:
def train(X, Y, layers_dims, learning_rate = 0.005, iterations = 50000):
    np.random.seed(2024)
    costs = []

    parameters = initialize_parameters(layers_dims)

    for i in range(iterations):
        output_activations, caches = forward_propagation(X, parameters)
        gradients = back_propagation(output_activations, Y, caches)

        if i % 100 == 0 or i == iterations:
            cost = model_cost(output_activations, Y)
            costs.append(cost)

    return parameters, costs

In [928]:
def predict(X, parameters):
    predictions, caches = forward_propagation(X, parameters)

    return predictions

In [929]:
def standardize(data):
    mean = np.mean(data, axis = 0)
    std = np.std(data, axis = 0)

    return (data - mean) / std

In [930]:
df = pd.read_csv('/path/to/data')
df.drop(columns=['id'], inplace=True)
# shuffle dataset
df = df.sample(frac=1, random_state=2024)
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
72,M,17.2,24.52,114.2,929.4,0.1071,0.183,0.1692,0.07944,0.1927,...,23.32,33.82,151.6,1681.0,0.1585,0.7394,0.6566,0.1899,0.3313,0.1339
516,M,18.31,20.58,120.8,1052.0,0.1068,0.1248,0.1569,0.09451,0.186,...,21.86,26.2,142.2,1493.0,0.1492,0.2536,0.3759,0.151,0.3074,0.07863
469,B,11.62,18.18,76.38,408.8,0.1175,0.1483,0.102,0.05564,0.1957,...,13.36,25.4,88.14,528.1,0.178,0.2878,0.3186,0.1416,0.266,0.0927
69,B,12.78,16.49,81.37,502.5,0.09831,0.05234,0.03653,0.02864,0.159,...,13.46,19.76,85.67,554.9,0.1296,0.07061,0.1039,0.05882,0.2383,0.0641
331,B,12.98,19.35,84.52,514.0,0.09579,0.1125,0.07107,0.0295,0.1761,...,14.42,21.95,99.21,634.3,0.1288,0.3253,0.3439,0.09858,0.3596,0.09166


In [931]:
X, y = standardize(df.loc[:, df.columns != 'diagnosis'].values), np.where(df['diagnosis'] == 'M', 1, 0)
# set test sample size
test_size = 0.3
test_samples = int(len(df) * test_size)

train_X = X[test_samples:].reshape(X[test_samples:].shape[0], -1).T
train_y = y[test_samples:].reshape((1, y[test_samples:].shape[0]))
test_X = X[:test_samples].reshape(X[:test_samples].shape[0], -1).T
test_y = y[:test_samples].reshape((1, y[:test_samples].shape[0]))

In [932]:
### constants ###

input_layer_num = X.shape[1]
output_layer_num = 1
learning_rate = 0.00075

layers_dims = [input_layer_num, 20, output_layer_num]

In [946]:
parameters, costs = train(train_X, train_y, layers_dims, learning_rate)
train_predictions = np.where(predict(train_X, parameters) > 0.5, 1, 0)
test_predictions = np.where(predict(test_X, parameters) > 0.5, 1, 0)

In [950]:
train_accuracy = ((np.dot(train_y, train_predictions.T) + np.dot(1 - train_y, 1 - train_predictions.T)) / train_y.size * 100).squeeze()
test_accuracy = ((np.dot(test_y, test_predictions.T) + np.dot(1 - test_y, 1 - test_predictions.T)) / test_y.size * 100).squeeze()
print('Train accuracy: ' + str(train_accuracy))
print('Test accuracy: ' + str(test_accuracy))

Train accuracy: 94.73684210526315
Test accuracy: 92.3529411764706
