In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def init_params(layer_dims):
    np.random.seed(3)
    params = {}
    L = len(layer_dims)
    
    for l in range(1, L):
        params['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01
        params['b' + str(l)] = np.zeros((layer_dims[l], 1))

    # Print initialized parameters
    print("Initialized parameters:", params.keys())
    return params

## Z (linear hypothesis) = W * X + b
### W = Weight matrix, X = Input, b = Bias vector

In [3]:
def sigmoid(Z):
    A = 1 / (1 + np.exp(-Z))
    cache = Z
    return A, cache

In [4]:
def forward_prop(X, params):
    A = X
    L = len(params) // 2  # Number of layers
    caches = []

    for l in range(1, L + 1):
        A_prev = A
        Z = np.dot(params['W' + str(l)], A_prev) + params['b' + str(l)]
        linear_cache = (A_prev, params['W' + str(l)], params['b' + str(l)])
        A, activation_cache = sigmoid(Z)
        caches.append((linear_cache, activation_cache))

    return A, caches

In [5]:
def cost_function(A, Y):
    m = Y.shape[1]

    cost = (-1/m) * np.sum(Y * np.log(A) + (1 - Y) * np.log(1 - A))

    return cost

In [6]:
def one_layer_backward(dA, cache):
    linear_cache, activation_cache = cache
    
    # Retrieve Z from activation cache
    Z = activation_cache  # This is correct, Z is now the activation_cache
    dZ = dA * sigmoid(Z)[0] * (1 - sigmoid(Z)[0])  # Use the sigmoid derivative
    
    A_prev, W, b = linear_cache
    m = A_prev.shape[1]  # Number of examples
    
    dW = (1/m) * np.dot(dZ, A_prev.T)
    db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

In [7]:
def backprop(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)

    dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))

    current_cache = caches[L - 1]
    grads['dA' + str(L)], grads['dW' + str(L)], grads['db' + str(L)] = one_layer_backward(dAL, current_cache)

    for l in reversed(range(L - 1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = one_layer_backward(grads["dA" + str(l + 2)], current_cache)
        
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads


In [8]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2

    for l in range(L):
        parameters['W' + str(l + 1)] -= learning_rate * grads['dW' + str(l + 1)]
        parameters['b' + str(l + 1)] -= learning_rate * grads['db' + str(l + 1)]

    return parameters


In [9]:
def train(X, Y, layer_dims, epochs, lr):
    params = init_params(layer_dims)
    cost_history = []

    for i in range(epochs):
        Y_hat, caches = forward_prop(X, params)

        cost = cost_function(Y_hat, Y)
        cost_history.append(cost)

        grads = backprop(Y_hat, Y, caches)

        params = update_parameters(params, grads, lr)

    return params, cost_history

In [10]:
def predict(X, params):
    Y_hat, _ = forward_prop(X, params)
    predictions = (Y_hat > 0.5).astype(int)
    return predictions

In [11]:
def accuracy(Y_hat, Y):
    return np.mean(Y_hat == Y) * 100

## Testing the Neural Network on a heart disease dataset

In [12]:
import pandas as pd
df = pd.read_csv("heart-disease.csv")
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [13]:
from sklearn.model_selection import train_test_split

y = df["target"]
X = df.drop(columns=["target"])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3)

In [28]:
import numpy as np

layer_dims = [13, 64, 32, 1]

X_train = np.array(X_train).T 
y_train = np.array(y_train).reshape(1, -1)
X_test = np.array(X_test).T
y_test = np.array(y_test).reshape(1, -1)

params, cost_history = train(X_train, y_train, layer_dims, epochs=1000, lr=0.01)

predictions = predict(X_test, params)

test_accuracy = accuracy(predictions, y_test)

print(f"Test Accuracy: {test_accuracy}%")

Initialized parameters: dict_keys(['W1', 'b1', 'W2', 'b2', 'W3', 'b3'])
Test Accuracy: 65.57377049180327%
