# Assi #3 

## Maher mohsen 20200415  S1
## Bemwa malak  20200116  S1

In [1]:
import numpy as np

## Data preprocessing

In [2]:
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
from abc import ABC, abstractmethod
import warnings
warnings.filterwarnings(action="ignore")

### 1. Loading minst data set

In [3]:
mnist = fetch_openml('mnist_784')

In [4]:
X = mnist.data.astype(float)
y = mnist.target.astype(int)

### 2. Split the data

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 3. Standarization 

In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

### 4. one hot encoding

In [7]:
y_train_onehot = np.eye(len(np.unique(y)))[y_train.values.reshape(-1)]
y_test_onehot  = np.eye(len(np.unique(y)))[y_test.values.reshape(-1)]
scaler = StandardScaler()
X_test = scaler.fit_transform(X_test)

## 5. Nueral network implementation

In [35]:
class NN:
    def __init__(self, X, y, num_of_layers, size_of_layers):
        
        size_of_input = X.shape[1]

        size_of_layers.insert(0, size_of_input)
        self.size_of_layers = size_of_layers
        
        num_of_layers = len(size_of_layers) 
        self.num_of_layers = num_of_layers
        #weights initialize
        #weight matrix for every layer each vector represent one node

        self.weights = [np.random.randn(size_of_layers[i], size_of_layers[i-1]) for i in range(1, num_of_layers)]
        self.biases = [np.zeros((size_of_layers[i], 1)) for i in range(1, num_of_layers)]
        
    def sigmoid(self,z):
        return 1 / (1+ np.exp(-z))
    
    def sigmoid_prime(self,a):
        return a * (1 - a)
    
    def mse(self,y, y_pred):
        return np.mean((y - y_pred) ** 2)
    
    def forward(self, X):
        self.outputs = []
        self.activations = [X]
            
        for i in range(self.num_of_layers - 1):
            z = np.dot(self.weights[i], self.activations[i]) + self.biases[i] #get net input for each node in the layer
            output = self.sigmoid(z)
            self.outputs.append(output) 
            self.activations.append(output) #append the output in activations to feedforward next layer
        return self.activations[-1]
    
    def backward(self, X, y, eta):
        n = X.shape[0]
        
        #Calculate gradient for output layer (dL/da)
        error = (self.activations[-1] - y) / n
        delta = error * self.sigmoid_prime(self.outputs[-1])
        
        #Update weights in output layer
        self.weights[-1] -= eta * np.dot(delta, self.activations[-2].T)
        self.biases[-1]  -= eta * np.sum(delta, axis = 1,keepdims = True)
        
        #backprop till and update all layers' weights
        for i in range(self.num_of_layers - 3, -1, -1):
            error = np.dot(self.weights[i + 1].T, delta)
            delta = error * self.sigmoid_prime(self.outputs[i])
            
            #update Weights 
            self.weights[i] -= eta * np.dot(delta, self.activations[i].T)
            self.biases[i]  -= eta * np.sum(delta, axis = 1, keepdims = True)
            
    def train(self, X, y, epochs, eta):
        for epoch in range(epochs):
            output = self.forward(X)
            self.backward(X, y, eta)
            
            if epoch % 100 == 0:
                loss = self.mse(y, output)
                print(f"Epoch {epoch}: loss = {loss}")
                
    def predict(self, X):
        return self.forward(X)

### 6.Testing NN class

In [10]:
def softmax(z):
    exp_z = np.exp(z)
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

In [9]:
nn = NN(X_train, y_train_onehot, num_of_layers = 2, size_of_layers = [2, 10])

In [10]:
nn.train(X_train.T, y_train_onehot.T, epochs=1000, eta=0.1)

Epoch 0: loss = 0.37230836813331447
Epoch 100: loss = 0.08576912594460147
Epoch 200: loss = 0.0818102914053868
Epoch 300: loss = 0.07936286093797296
Epoch 400: loss = 0.07746701526622053
Epoch 500: loss = 0.07583351455817713
Epoch 600: loss = 0.07447031705231309
Epoch 700: loss = 0.07345016472469755
Epoch 800: loss = 0.07265121129555052
Epoch 900: loss = 0.07202165491026256


In [11]:
# Make predictions on the test set
predictions = nn.predict(X_test.T)
predicted_labels = softmax(predictions)

# Calculate accuracy
accuracy = np.mean(predicted_labels == y_test)
print("Accuracy:", accuracy * 100, '%')

Accuracy: 38.707142857142856 %


## 7. Testing

### Architecture #1 [2 layers 1 hidden and 1 output]

![Image Title](archi1.png)

#### Design the neural network

In [15]:
archi_1 = NN(X_train, y_train,num_of_layers = 2, size_of_layers = [2, 10])

#### Training

In [16]:
archi_1.train(X_train.T, y_train_onehot.T, epochs=1000, eta=0.1)

Epoch 0: loss = 0.31005271580906196
Epoch 100: loss = 0.08796086690231483
Epoch 200: loss = 0.082776575032702
Epoch 300: loss = 0.07753233724439461
Epoch 400: loss = 0.07390045780519038
Epoch 500: loss = 0.07208622071509892
Epoch 600: loss = 0.07104580742979892
Epoch 700: loss = 0.0703268214826149
Epoch 800: loss = 0.06979386427217543
Epoch 900: loss = 0.0693892574884638


#### Testing and accuracy

In [17]:
# Make predictions on the test set
predictions_archi1 = archi_1.predict(X_test.T)
predicted_labels_archi_1 = np.argmax(predictions_archi1, axis=0)

# Calculate accuracy
accuracy_archi_1 = np.mean(predicted_labels_archi_1 == y_test)
print("Accuracy:", accuracy_archi_1 * 100, '%')

Accuracy: 38.75714285714286 %


### Architecture #2 [3 layers 2 hidden and 1 output , # of neurons in first layer < # of neurons in second layer]

![Image Title](archi2.png)

#### Design the neural network

In [20]:
archi_2 = NN(X_train, y_train, num_of_layers = 6, size_of_layers = [16, 20, 24, 16, 10])


#### Training

In [23]:
archi_2.train(X_train.T, y_train_onehot.T, epochs=10000, eta=0.1)

Epoch 0: loss = 0.07969738712439782
Epoch 100: loss = 0.06289644820094316
Epoch 200: loss = 0.05035454522090553
Epoch 300: loss = 0.039370927495596106
Epoch 400: loss = 0.033150326016212144
Epoch 500: loss = 0.029275843056786755
Epoch 600: loss = 0.026696652365618523
Epoch 700: loss = 0.024824868945244792
Epoch 800: loss = 0.02338751032462437
Epoch 900: loss = 0.02225116550667232
Epoch 1000: loss = 0.02130226357421026
Epoch 1100: loss = 0.020529227404638514
Epoch 1200: loss = 0.019859544132210537
Epoch 1300: loss = 0.019274457338047606
Epoch 1400: loss = 0.018767617328164024
Epoch 1500: loss = 0.01830843035756627
Epoch 1600: loss = 0.017876743380677188
Epoch 1700: loss = 0.01749976001808616
Epoch 1800: loss = 0.01715759270693522
Epoch 1900: loss = 0.016848976077197285
Epoch 2000: loss = 0.016584048646400215
Epoch 2100: loss = 0.01633308680521238
Epoch 2200: loss = 0.0160998380894252
Epoch 2300: loss = 0.015878055178682664
Epoch 2400: loss = 0.015667524033127433
Epoch 2500: loss = 0.015

#### Testing and accuracy

In [30]:
import tensorflow as tf

In [55]:
# Make predictions on the test set
predictions_archi_2 = archi_2.predict(X_train.T)
softmax_probs = tf.nn.softmax(predictions_archi_2.T)

# Get predicted classes
predicted_classes = np.argmax(softmax_probs, axis=1)

# # Calculate accuracy
print(predicted_classes)
accuracy_archi_2 = np.mean(predicted_classes == y_train)
print("Accuracy:", accuracy_archi_2 * 100, '%')

[5 4 8 ... 1 0 0]
Accuracy: 93.50892857142857 %


Accuracy: 87.9857142857143 %


### Architecture #2 [3 layers 2 hidden and 1 output , # of neurons in first layer > # of neurons in second layer]

![Image Title](archi3.png)

#### Design the neural network

In [9]:
archi_3 = NN(X_train, y_train, num_of_layers = 3, size_of_layers = [3, 2, 10])

#### Training

In [10]:
archi_3.train(X_train.T, y_train_onehot.T, epochs=1000, eta=0.1)

Epoch 0: loss = 0.272844156759178
Epoch 100: loss = 0.08785938343905943
Epoch 200: loss = 0.08493492792778222
Epoch 300: loss = 0.08150011092464554
Epoch 400: loss = 0.0783043072606244
Epoch 500: loss = 0.07599735130537534
Epoch 600: loss = 0.07397083635549492
Epoch 700: loss = 0.07208439173023358
Epoch 800: loss = 0.07076196643169586
Epoch 900: loss = 0.06971070500519923


#### Testing and accuracy

In [13]:
# Make predictions on the test set
predictions_archi_3 = archi_3.predict(X_test.T)
predicted_labels_archi_3 = np.argmax(predictions_archi_3, axis=0)

# Calculate accuracy
accuracy_archi_3 = np.mean(predicted_labels_archi_3 == y_test)
print("Accuracy:", accuracy_archi_3 * 100, '%')

Accuracy: 41.75 %
