In [543]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import *
from sklearn.model_selection import train_test_split
import imgaug.augmenters as iaa

In [544]:
(x, y), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_val, y_train, y_val = train_test_split(x,y, test_size=0.1, random_state=42)

y_ohe=pd.get_dummies(y_train).values
# y_test_ohe=pd.get_dummies(y_test).values
# y_val_ohe=pd.get_dummies(y_val).values

In [545]:
x_train=x_train.astype('float32')/255.0
x_test=x_test.astype('float32')/255.0
x_val=x_val.astype('float32')/255.0

In [546]:
seq = iaa.Sequential([
    iaa.Crop(px=(0, 10)),
    iaa.Fliplr(0.5),
    iaa.GaussianBlur(sigma=(0, 1.0))
])

x_aug = seq.augment_images(x_train)
x_test_aug = seq.augment_images(x_test)
x_val_aug = seq.augment_images(x_val)


In [547]:
x_aug=x_aug.reshape(x_aug.shape[0],x_aug.shape[1]*x_aug.shape[2])
x_test_aug=x_test_aug.reshape(x_test_aug.shape[0],x_test_aug.shape[1]*x_test_aug.shape[2])
x_val_aug=x_val_aug.reshape(x_val_aug.shape[0],x_val_aug.shape[1]*x_val_aug.shape[2])

In [548]:
def initialize_parameters(n_in, n_h, n_out):

    W1=np.random.randn(n_in,n_h) * np.sqrt(2 / (n_in + n_h)) 
    b1=np.random.randn(1, n_h) * 0.01#np.sqrt(2 / (1 + n_h))
    W2=np.random.randn(n_h,n_h) * np.sqrt(2 / (n_h + n_h)) 
    b2=np.random.randn(1, n_h) * 0.01#np.sqrt(2 / (1 + n_h))
    W3=np.random.randn(n_h,n_out) * np.sqrt(2 / (n_h + n_out)) 
    b3=np.random.randn(1, n_out) * 0.01#np.sqrt(2 / (1 + n_out))

    parameters = {"W1": W1,"b1": b1,"W2": W2,"b2": b2,"W3": W3,"b3": b3}
    
    return parameters


In [549]:
def relu(x):
        return np.maximum(0, x)

In [550]:
def softmax(x):
    e_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return e_x / np.sum(e_x, axis=1, keepdims=True)

In [551]:
def reluDeriv(x):
        return x > 0 

In [552]:
def forward_propagation(X, parameters):
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]  
    W3=parameters["W3"]
    b3=parameters["b3"]   
    
    Z1=np.dot(X,W1)+b1
    A1=relu(Z1)
    Z2=np.dot(A1,W2)+b2
    A2=relu(Z2)
    Z3=np.dot(A2,W3)+b3
    A3=softmax(Z3)
    
    values = {"Z1": Z1,"A1": A1,"Z2": Z2,"A2": A2, "Z3": Z3,"A3": A3}
    
    return values

In [553]:
def backward_propagation(parameters, values, X, Y):

    m = X.shape[1]
    W2=parameters["W2"]
    W3=parameters["W3"]
    Z1=values["Z1"]
    Z2=values["Z2"]
    A1=values["A1"]
    A2=values["A2"]
    A3=values["A3"]

    dZ3 = A3-Y
    dW3 = np.dot(A2.T,dZ3)/m
    db3 = np.sum(dZ3,axis=0,keepdims=True)/m

    dZ2 = np.multiply(np.dot(dZ3,W3.T),reluDeriv(Z2))
    dW2 = np.dot(A1.T,dZ2)/m
    db2 = np.sum(dZ2,axis=0,keepdims=True)/m

    dZ1 = np.multiply(np.dot(dZ2,W2.T),reluDeriv(Z1))
    dW1 = np.dot(X.T,dZ1)/m
    db1 = np.sum(dZ1,axis=0,keepdims=True)/m
    
    gradients = {"dW1": dW1,"db1": db1,"dW2": dW2,"db2": db2,"dW3": dW3,"db3": db3}
    
    return gradients

In [554]:
def update_parameters(parameters, gradients, alpha):
    parameters["W1"]-=(alpha * gradients["dW1"])
    parameters["b1"]-=(alpha * gradients["db1"])
    parameters["W2"]-=(alpha * gradients["dW2"])
    parameters["b2"]-=(alpha * gradients["db2"])
    parameters["W3"]-=(alpha * gradients["dW3"])
    parameters["b3"]-=(alpha * gradients["db3"])
    return parameters

In [555]:
def nn_model(X, Y, epoch, alpha):
    parameters=initialize_parameters(X.shape[1],100,Y.shape[1])
    for i in range(epoch):
        values = forward_propagation(X, parameters)
        grads = backward_propagation(parameters, values, X, Y)
        parameters = update_parameters(parameters, grads, alpha)
        # if i % 10 == 0:
            # acc = accuracy_score(np.argmax(Y, axis=1),  np.argmax(values["A3"], axis=1))
            # print(f"Epoch {i}, Accuracy: {acc}")
    return parameters

In [556]:
final=nn_model(x_aug,y_ohe,300,0.005)

In [None]:
print("Training Accuracy: ")
vals=forward_propagation(x_aug, final)
y_true = np.argmax(vals["A3"], axis=1)
report1 = classification_report(y_train, y_true)
print(report1)

print("Testing Accuracy: ")
vals=forward_propagation(x_test_aug, final)
y_true = np.argmax(vals["A3"], axis=1)
report2 = classification_report(y_test, y_true)
print(report2)

print("Validation Accuracy: ")
vals=forward_propagation(x_val_aug, final)
y_true = np.argmax(vals["A3"], axis=1)
report3 = classification_report(y_val, y_true)
print(report3)

Training Accuracy: 
              precision    recall  f1-score   support

           0       0.56      0.79      0.65      5299
           1       0.73      0.95      0.82      6088
           2       0.51      0.70      0.59      5386
           3       0.54      0.31      0.40      5542
           4       0.62      0.73      0.67      5262
           5       0.43      0.27      0.33      4870
           6       0.71      0.49      0.58      5338
           7       0.49      0.83      0.62      5632
           8       0.79      0.25      0.38      5266
           9       0.65      0.45      0.53      5317

    accuracy                           0.59     54000
   macro avg       0.60      0.58      0.56     54000
weighted avg       0.60      0.59      0.56     54000

Testing Accuracy: 
              precision    recall  f1-score   support

           0       0.55      0.80      0.65       980
           1       0.75      0.95      0.84      1135
           2       0.52      0.69      