In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [2]:
np.random.seed(42)

In [3]:
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [4]:
X_train.shape, y_train.shape

((455, 30), (455,))

In [5]:
def layer_sizes(X, y, hidden_layers=4):
    n_i = X.shape[1]                # features
    n_h = hidden_layers
    n_o = 1 if y.ndim == 1 else y.shape[1]
    return n_i, n_h, n_o

In [6]:
n_i, n_h, n_o = layer_sizes(X_train, y_train)
n_i, n_h, n_o

(30, 4, 1)

In [7]:
def initialize_parameters(n_i, n_h, n_o, seed=42):
    rng = np.random.default_rng(seed)
    W1 = rng.normal(0, 0.01, size=(n_h, n_i))
    b1 = np.zeros((n_h, 1))
    W2 = rng.normal(0, 0.01, size=(n_o, n_h))
    b2 = np.zeros((n_o, 1))
    return {"W1": W1, "b1": b1, "W2": W2, "b2": b2}

In [8]:
parameters = initialize_parameters(n_i, n_h, n_o)
parameters

{'W1': array([[ 0.00304717, -0.01039984,  0.00750451,  0.00940565, -0.01951035,
         -0.0130218 ,  0.0012784 , -0.00316243, -0.00016801, -0.00853044,
          0.00879398,  0.00777792,  0.00066031,  0.01127241,  0.00467509,
         -0.00859292,  0.00368751, -0.00958883,  0.0087845 , -0.00049926,
         -0.00184862, -0.0068093 ,  0.01222541, -0.00154529, -0.00428328,
         -0.00352134,  0.00532309,  0.00365444,  0.00412733,  0.00430821],
        [ 0.02141648, -0.00406415, -0.00512243, -0.00813773,  0.00615979,
          0.01128972, -0.00113947, -0.00840156, -0.00824481,  0.00650593,
          0.00743254,  0.00543154, -0.0066551 ,  0.00232161,  0.00116686,
          0.00218689,  0.00871429,  0.00223596,  0.00678914,  0.00067579,
          0.00289119,  0.00631288, -0.01457156, -0.00319671, -0.00470373,
         -0.00638878, -0.00275142,  0.01494941, -0.00865831,  0.00968278],
        [-0.0168287 , -0.00334885,  0.00162753,  0.00586222,  0.00711227,
          0.00793347, -0.00348

In [9]:
def forward_propagation(X, parameters):
    W1, b1 = parameters["W1"], parameters["b1"]
    W2, b2 = parameters["W2"], parameters["b2"]

    Z1 = W1 @ X + b1             # (n_h, m)
    A1 = np.tanh(Z1)             # (n_h, m)
    Z2 = W2 @ A1 + b2            # (n_o, m)
    A2 = 1 / (1 + np.exp(-Z2))   # (n_o, m)

    cache = {"Z1": Z1, "A1": A1, "Z2": Z2, "A2": A2}
    return A2, cache

In [10]:
A2, cache = forward_propagation(X_train.T, parameters)

In [13]:
y_train.shape

(455,)

In [14]:
def compute_cost(A2, Y):
    m = Y.shape[0]                          # note: axis 1
    eps = 1e-12
    logprobs = Y*np.log(A2+eps) + (1-Y)*np.log(1-A2+eps)
    cost = -np.sum(logprobs) / m
    return float(cost)

In [15]:
cost = compute_cost(A2, y_train)
cost

0.6963495949974844

In [16]:
def backward_propagation(parameters, cache, X, Y):
    m = X.shape[1]

    W2 = parameters["W2"]
    A1, A2 = cache["A1"], cache["A2"]

    dZ2 = A2 - Y                          # (n_o, m)
    dW2 = (dZ2 @ A1.T) / m                # (n_o, n_h)
    db2 = np.sum(dZ2, axis=1, keepdims=True) / m  # (n_o, 1)

    dZ1 = (W2.T @ dZ2) * (1 - A1**2)      # (n_h, m)
    dW1 = (dZ1 @ X.T) / m                 # (n_h, n_i)
    db1 = np.sum(dZ1, axis=1, keepdims=True) / m  # (n_h, 1)

    return {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}

In [17]:
grads = backward_propagation(parameters, cache, X_train.T, y_train)
print ("dW1 = "+ str(grads["dW1"]))
print ("db1 = "+ str(grads["db1"]))
print ("dW2 = "+ str(grads["dW2"]))
print ("db2 = "+ str(grads["db2"]))

dW1 = [[ 6.66756201e-05  1.43315371e-04  4.23464822e-04  1.79027938e-03
   7.44933334e-07  6.30567937e-07  3.26551901e-07  9.42789203e-08
   1.43173007e-06  5.52117826e-07  2.03905513e-06  1.13749559e-05
   1.39542986e-05  1.12436479e-04  8.61254214e-08  2.11448452e-07
   2.63030598e-07  5.92568394e-08  1.94981257e-07  4.20207323e-08
   7.41073461e-05  1.87052952e-04  4.74981649e-04  2.20932852e-03
   1.09331067e-06  1.38766890e-06  1.19423369e-06  3.18619656e-07
   2.18989439e-06  7.15403169e-07]
 [-1.92195886e-06 -4.20144111e-06 -1.21827433e-05 -4.84244475e-05
  -2.33147110e-08 -1.94942640e-08 -9.02896761e-09 -2.35060427e-09
  -4.43265365e-08 -1.72386878e-08 -6.21693509e-08 -3.53305890e-07
  -4.24194916e-07 -3.27036311e-06 -2.73293580e-09 -6.34334237e-09
  -7.34393620e-09 -1.55766840e-09 -6.25818542e-09 -1.32747174e-09
  -2.14095264e-06 -5.49734687e-06 -1.36899579e-05 -6.00595100e-05
  -3.39308155e-08 -4.11694006e-08 -3.19285494e-08 -7.75009981e-09
  -6.73610503e-08 -2.21043055e-08]


In [18]:

def update_parameters(parameters, grads, learning_rate = 1.2):

    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]

    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [19]:
def nn_model(X, Y, n_h, num_iterations=10000, print_cost=False):

    Xc = X.T
    Yc = Y.reshape(1, -1)                 

    n_i, _, n_o = layer_sizes(X, Y)
    parameters = initialize_parameters(n_i, n_h, n_o)

    for i in range(num_iterations):
        A2, cache = forward_propagation(Xc, parameters)
        cost = compute_cost(A2, Yc)
        grads = backward_propagation(parameters, cache, Xc, Yc)
        parameters = update_parameters(parameters, grads, learning_rate=0.1)

        if print_cost and i % 1000 == 0:
            print(f"Cost after iteration {i}: {cost:.6f}")

    return parameters


In [20]:
n_i, n_h, n_o = layer_sizes(X_train, y_train, hidden_layers=4)
params = nn_model(X_train, y_train, n_h=4, num_iterations=10000, print_cost=True)

Cost after iteration 0: 316.839066
Cost after iteration 1000: 300.690231
Cost after iteration 2000: 300.690204
Cost after iteration 3000: 300.690195
Cost after iteration 4000: 300.690190
Cost after iteration 5000: 300.690187
Cost after iteration 6000: 300.690185
Cost after iteration 7000: 300.690184
Cost after iteration 8000: 300.690183
Cost after iteration 9000: 300.690182


In [21]:
params

{'W1': array([[ 0.00343702, -0.0095054 ,  0.00996195,  0.01840186, -0.01950509,
         -0.01301758,  0.00127989, -0.00316211, -0.00015822, -0.00852659,
          0.00880634,  0.00785358,  0.00074471,  0.01188286,  0.00467568,
         -0.00859167,  0.0036888 , -0.00958859,  0.00878583, -0.00049899,
         -0.00140994, -0.00561349,  0.01501718,  0.00987765, -0.00427558,
         -0.00351268,  0.00532864,  0.00365556,  0.00414217,  0.00431312],
        [ 0.0210825 , -0.00480754, -0.00722666, -0.0157078 ,  0.00615512,
          0.01128611, -0.00114055, -0.00840179, -0.00825334,  0.00650256,
          0.00742181,  0.00536507, -0.00672855,  0.00180087,  0.00116635,
          0.0021859 ,  0.00871336,  0.00223579,  0.00678796,  0.00067556,
          0.00251538,  0.00530996, -0.01696292, -0.01279863, -0.00471045,
         -0.00639588, -0.00275536,  0.01494863, -0.00867124,  0.00967854],
        [-0.01659882, -0.00288868,  0.00292957,  0.00045254,  0.00711408,
          0.00793128, -0.00349

In [None]:
def predict(parameters, X):

    A2, _ = forward_propagation(X, parameters)
    predictions = (A2 > 0.5)
    
    return predictions