In [26]:
import numpy as np
import pandas as pd
from tqdm import tqdm

In [27]:
data = np.load("mnist.npz") # loading dataset

# assign dataset
x_train = data["x_train"] # shape : (60000, 784)
y_train = data["y_train"] # shape : (60000,)

x_test = data["x_test"]
y_test = data["y_test"]

# data normalization and flattening 
x_train = x_train / 255.0
x_train = x_train.reshape(-1, 28*28)

x_test = x_test / 255.0
x_test = x_test.reshape(-1, 28*28)

#one hotting for have 10 classes
y_train_onehot = np.eye(10)[y_train]
y_test_onehot = np.eye(10)[y_test]

In [28]:
def initialization(X, hidden): # function initialisation of wheights of the ANN
	output=10
	n_input = X.shape[1]

	#first layer
	W1 = np.random.randn(hidden, n_input) * np.sqrt(1. / n_input)
	b1 = np.zeros((1, hidden))

	#second layer
	W2 = np.random.randn(output, hidden) * np.sqrt(1. / hidden)
	b2 = np.zeros((1, output))

	weights = {
		"W1" : W1,
		"b1" : b1,
		"W2" : W2,
		"b2" : b2
	}
	
	return weights

In [29]:
def sigmoid(z):
	return 1 / (1 + np.exp(-z))
def softmax(z):
	exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
	return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def forward_propagation(X, weights): # forward propagation function
	
	W1 = weights["W1"]
	b1 = weights["b1"]
	W2 = weights["W2"]
	b2 = weights["b2"]

	#layer 0
	A0 = X # shape : (60000, 784)

	#first layer
	Z1 = A0 @ W1.T + b1 # shape : (60000, 784) @ (784, 10) + (1,10) = (60000, 10)
	A1 = sigmoid(Z1)

	#second layer
	Z2 = A1 @ W2.T + b2   # shape : (60000, 10) @ (10, 10) + (1,10) = (60000, 10)
	A2 = softmax(Z2)

	activations = {
		"A1" : A1,
		"A2" : A2
	}

	return activations

In [30]:
def cross_entropy_loss(activations, Y): # loss function
	A = activations["A2"]
	m = Y.shape[0]
	
	# Pour éviter log(0)
	epsilon = 1e-9
	
	loss = -np.sum(Y * np.log(A + epsilon)) / m
	
	return loss

In [31]:
def gradient(X, Y, activations, weights):

	A1 = activations["A1"]
	A2 = activations["A2"]

	W2 = weights["W2"]

	m = X.shape[0]

										# d mean the partial derivative
	dZ2 = (A2 - Y) / m 						 # = (dLoss / dA2) * (dA2 / dZ2) easy derivative, cross entropy then softmax
	dW2 = A1.T @ dZ2						 # = (dLoss / dA2) * (dA2 / dZ2) * (dZ2 / dW2)
	db2 = np.sum(dZ2, axis=0, keepdims=True) # = (dLoss / dA2) * (dA2 / dZ2) * (dZ2 / db2)

	dA1 = dZ2 @ W2.T 						 # = (dLoss / dA2) * (dA2 / dZ2) * (dZ2 / dA1)
	dZ1 = dA1 * (A1 * (1 - A1)) 			 # = (dLoss / dA2) * (dA2 / dZ2) * (dZ2 / dA1) * (dA1 / dZ1)
	dW1 = X.T @ dZ1 						 # = (dLoss / dA2) * (dA2 / dZ2) * (dZ2 / dA1) * (dA1 / dZ1) * (dZ1 / dW1)
	db1 = np.sum(dZ1, axis=0, keepdims=True) # = (dLoss / dA2) * (dA2 / dZ2) * (dZ2 / dA1) * (dA1 / dZ1) * (dZ1 / db1)
	
	gradients = {
		"dW1" : dW1.T,
		"db1" : db1,
		"dW2" : dW2.T,
		"db2" : db2
	}


	return gradients

In [32]:
def update(weights, gradients, learning_rate): # function for updating the weight of the model.
	
	weights["W1"]-=learning_rate*gradients["dW1"]
	weights["b1"]-=learning_rate*gradients["db1"]

	weights["W2"]-=learning_rate*gradients["dW2"]
	weights["b2"]-=learning_rate*gradients["db2"]

	return weights

In [36]:
def accuracy_score(y_true, y_pred):
    return np.mean(y_true == y_pred)

In [37]:
class Model:
	def __init__(self, X, Y,hidden=10):
		self.X = X
		self.Y = Y
		self.weights = initialization(X,hidden)

	def fit(self, n_iter=1000, learning_rate=0.1):
		cost = []
		for _ in tqdm(range(n_iter)):
			activations = forward_propagation(self.X, self.weights)
			gradients = gradient(self.X,self.Y,activations,self.weights)
			self.weights = update(self.weights,gradients,learning_rate)
			cost.append(cross_entropy_loss(activations,self.Y))
		return cost
		

	def predict(self, X):
		activations = forward_propagation(X, self.weights)
		return np.argmax(activations["A2"], axis=1)  # retourne les classes prédites (0-9)

	def score(self, X, y):
		y_pred = self.predict(X)
		return np.mean(y_pred == y)


In [38]:
newModel = Model(x_train,y_train_onehot)

In [39]:
newModel.fit(100)

100%|██████████| 100/100 [01:44<00:00,  1.04s/it]


[2.378046570597063,
 2.3704909354559884,
 2.363450813584647,
 2.356864226402914,
 2.3506779692708415,
 2.344846120461924,
 2.339328888343941,
 2.3340917008637985,
 2.3291044695187724,
 2.3243409808635613,
 2.319778383696701,
 2.3153967505101773,
 2.311178698638606,
 2.3071090608321114,
 2.303174597588153,
 2.299363745192577,
 2.295666394496222,
 2.2920736962570043,
 2.2885778895405484,
 2.2851721502463858,
 2.2818504573239293,
 2.278607474661747,
 2.275438446974186,
 2.272339108274622,
 2.269305601723027,
 2.2663344097789153,
 2.2634222936920283,
 2.260566241434911,
 2.2577634232351222,
 2.2550111539090643,
 2.2523068612413284,
 2.2496480596975954,
 2.2470323288082974,
 2.2444572956153417,
 2.241920620635047,
 2.2394199868555327,
 2.2369530913539672,
 2.234517639185538,
 2.2321113392589798,
 2.22973190196988,
 2.2273770384103884,
 2.225044461010273,
 2.222731885488545,
 2.220437034006629,
 2.2181576394146307,
 2.215891450472856,
 2.2136362379147103,
 2.2113898011971482,
 2.209149975764

In [40]:
acc = newModel.score(x_test, y_test)       # si vous utilisez la méthode de classe

print(f"Précision sur le test : {acc * 100:.2f}%")

Précision sur le test : 42.76%
