In [76]:
#point d
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from sklearn.datasets import load_breast_cancer

#importing wisconsin brest cancer dataset
data = load_breast_cancer()
X = data.data
z = data.target

#splitting the dataset into training and testing
X_train, X_test, z_train, z_test = train_test_split(X, z, test_size=0.2)

z_train = z_train.reshape(-1,)
z_test = z_test.reshape(-1,)

#transformation
def transformation(z, num_classes = 2):
    z = np.eye(num_classes)[z]
    return z

#inverse
def inverse_transformation(z):
    z = np.argmax(z, axis = 1)
    return z


In [52]:
#define RELU function as the activation function for the hidden layer
def relu(x):
    return np.maximum(0,x)

#define softmax as the activation function for the output layer
def softmax(a):
    expA = np.exp(a)
    return expA / expA.sum(axis=1, keepdims=True)

In [53]:
#initialization of weight and biases for a flexible number of hidden layers including the output layer
def initialize_parameters(layer_dims):
    parameters = {}
    for i in range(1, len(layer_dims)):
        parameters[f'W{i}'] = np.random.randn(layer_dims[i-1], layer_dims[i]) * 0.01
        parameters[f'b{i}'] = np.zeros((layer_dims[i], 1))
    return parameters

In [64]:
#We now write the feed forward pass
def feed_forward(X, parameters):
    cache = {'A0': X}
    for i in range(1, len(parameters)//2 + 1):
        Z = cache[f'A{i-1}'] @ parameters[f'W{i}'] + parameters[f'b{i}'].T
        A = relu(Z) if i < len(parameters)//2 else softmax(Z) #if we are at the output layer, we use softmax
        cache[f'Z{i}'] = Z
        cache[f'A{i}'] = A
    return cache

def feed_forward_hidden(X, parameters):
    cache = {'A0': X}
    for i in range(1, len(parameters)//2):  # s'arrête avant la couche de sortie
        Z = cache[f'A{i-1}'] @ parameters[f'W{i}'] + parameters[f'b{i}']
        A = relu(Z)
        cache[f'Z{i}'] = Z
        cache[f'A{i}'] = A
    return cache

In [55]:
#we now define the cost/loss function. We use the cross-entropy because it is a classification problem
def cross_entropy(prediction, target):
    epsilon = 1e-12 #added to avoid log(0)
    return -(target*np.log(prediction+epsilon) + (1-target)*np.log(1-prediction+epsilon))

def d_cross_entropy(prediction, target):   #attention: retrouver dérivée
    epsilon = 1e-12 #added to avoid log(0)
    return -(target/(prediction+epsilon) - (1-target)/(1-prediction+epsilon))

In [71]:
def backpropagation(X, Y, cache, parameters):
    m = Y.shape[0]
    gradients = {}
    output_error = d_cross_entropy(cache[f'A{len(parameters)//2}'], transformation(Y))
    for i in range(len(parameters)//2, 0, -1):
        gradients[f'dW{i}'] = 1/m * cache[f'A{i-1}'].T@ output_error
        gradients[f'db{i}'] = 1/m * np.sum(output_error, axis=0, keepdims=True)
        output_error = np.dot(output_error, parameters[f'W{i}'].T) * (cache[f'Z{i-1}'] > 0) if i > 1 else None
    return gradients

def backpropagation_update_parameters(parameters, gradients, learning_rate):
    for i in range(1, len(parameters)//2 + 1):
        parameters[f'W{i}'] -= learning_rate * gradients[f'dW{i}']
        parameters[f'b{i}'] -= learning_rate * gradients[f'db{i}'].T
    return parameters

In [78]:
#inizializzo dimensioni layers
n_inputs, n_features = X_train.shape
hid = 7 #number of neurons in the hidden layers
out = 2 #number of neurons in the output layer
layer_dims = [n_features, hid, out] #for one hidden layer
#layer_dims = [n_features, hid, hid, out] for two hidden layers

#inizializzo parametri
parameters = initialize_parameters(layer_dims)

#initialization of hyperparameters
learning_rate = 1e-3
epochs = 10000

#training
for i in range(epochs):
    cache = feed_forward(X_train, parameters)
    gradients = backpropagation(X_train, z_train, cache, parameters)
    parameters = backpropagation_update_parameters(parameters, gradients, learning_rate)
    if i % 100 == 0:
        print(f'Cost at iteration {i}', cross_entropy(cache[f'A{len(parameters)//2}'], transformation(z_train)).mean())

#testing
cache = feed_forward(X_test, parameters)
predictions = cache[f'A{len(parameters)//2}']
predictions = np.round(predictions)
accuracy = accuracy_score(z_test, inverse_transformation(predictions))
print('Accuracy on test set:', accuracy)




Cost at iteration 0 0.7037857884801421
Cost at iteration 100 0.6794227612621286
Cost at iteration 200 0.6706375069964506
Cost at iteration 300 0.6647091089988795
Cost at iteration 400 0.6607046686842254
Cost at iteration 500 0.6579995658900458
Cost at iteration 600 0.6561729566248524
Cost at iteration 700 0.65494037653712
Cost at iteration 800 0.6541092914856429
Cost at iteration 900 0.6535493669179505
Cost at iteration 1000 0.6531724175324418
Cost at iteration 1100 0.6529188265719973
Cost at iteration 1200 0.6527483303901718
Cost at iteration 1300 0.6526337633485467
Cost at iteration 1400 0.6525568146072902
Cost at iteration 1500 0.6525051527392971
Cost at iteration 1600 0.6524704796747817
Cost at iteration 1700 0.6524472153038182
Cost at iteration 1800 0.6524316094488577
Cost at iteration 1900 0.6524211430275649
Cost at iteration 2000 0.6524141246323396
Cost at iteration 2100 0.6524094189930358
Cost at iteration 2200 0.6524062643451913
Cost at iteration 2300 0.6524041496728118
Cost a