In [1]:
import numpy as np
import requests, gzip, os, hashlib
# Shortcut for from matplotlib import pyplot as PLT \/
%matplotlib inline

In [2]:
# fetch and decode data
path = 'data/'
def fetch(url):
    fp = os.path.join(path, hashlib.md5(url.encode('utf-8')).hexdigest())
    if os.path.isfile(fp):
        with open(fp, "rb") as f:
            data = f.read()
    else:
        with open(fp, "wb") as f:
            data = requests.get(url).content
            f.write(data)
    return np.frombuffer(gzip.decompress(data), dtype=np.uint8).copy()

X = fetch("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28, 28))
Y = fetch("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz")[8:]
X_test = fetch("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz")[0x10:].reshape((-1, 28*28))
Y_test = fetch("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz")[8:]

In [3]:
# Validation Split
rand = np.arange(60000)
np.random.shuffle(rand)
train_no = rand[:50000]

val_no = np.setdiff1d(rand,train_no)

X_train,X_val=X[train_no,:,:],X[val_no,:,:]
Y_train,Y_val=Y[train_no],Y[val_no]

In [4]:
def init(x,y):
    layer = np.random.uniform(-1.,1.,size=(x,y))/np.sqrt(x*y)
    return layer.astype(np.float32)

np.random.seed(42)
l1 = init(28*28,128)
l2 = init(128,10)

In [5]:
def sigmoid(x):
    return 1/(np.exp(-x)+1)

def d_sigmoid(x):
    return (np.exp(-x))/((np.exp(-x)+1)**2)

In [6]:
def softmax(x):
    exponents=np.exp(x)
    return exponents/np.sum(exponents)

def d_softmax(x):
    exp_element=np.exp(x-x.max())
    return exp_element/np.sum(exp_element,axis=0)*(1-exp_element/np.sum(exp_element,axis=0))

In [7]:
output_of_layer_2=np.array([12,34,-67,23,0,134,76,24,78,-98])
a=softmax(output_of_layer_2)
a,sum(a)

(array([1.03770332e-053, 3.72007598e-044, 5.09107081e-088, 6.21315959e-049,
        6.37586958e-059, 1.00000000e+000, 6.47023493e-026, 1.68891188e-048,
        4.78089288e-025, 1.75258947e-101]),
 1.0)

In [8]:
x = np.argmax(a) # Returns index of maximum value(most likely solution)
x,output_of_layer_2[x]

(5, 134)

In [9]:
# Redefine to prevent overflow
def softmax(x):
    exp_element=np.exp(x-x.max())
    return exp_element/np.sum(exp_element,axis=0)

#derivative of softmax
def d_softmax(x):
    exp_element=np.exp(x-x.max())
    return exp_element/np.sum(exp_element,axis=0)*(1-exp_element/np.sum(exp_element,axis=0))

In [11]:
def forward_backward_pass(x,y):
    targets = np.zeros((len(y),10), np.float32)
    targets[range(targets.shape[0]),y] = 1
 
    
    x_l1=x.dot(l1)
    x_sigmoid=sigmoid(x_l1)
    x_l2=x_sigmoid.dot(l2)
    out=softmax(x_l2)
   
 
    error=2*(out-targets)/out.shape[0]*d_softmax(x_l2)
    update_l2=x_sigmoid.T@error
    
    
    error=((l2).dot(error.T)).T*d_sigmoid(x_l1)
    update_l1=x.T@error

    return out,update_l1,update_l2 

In [12]:
epochs=10000
lr=0.001
batch=128

losses,accuracies,val_accuracies=[],[],[]

for i in range(epochs):
    sample=np.random.randint(0,X_train.shape[0],size=(batch))
    x=X_train[sample].reshape((-1,28*28))
    y=Y_train[sample]
 

    out,update_l1,update_l2=forward_backward_pass(x,y)
  
    category=np.argmax(out,axis=1)
    accuracy=(category==y).mean()
    accuracies.append(accuracy)
    
    loss=((category-y)**2).mean()
    losses.append(loss.item())
    
    l1=l1-lr*update_l1
    l2=l2-lr*update_l2
    
    if(i%20==0):    
        X_val=X_val.reshape((-1,28*28))
        val_out=np.argmax(softmax(sigmoid(X_val.dot(l1)).dot(l2)),axis=1)
        val_acc=(val_out==Y_val).mean()
        val_accuracies.append(val_acc.item())
    if(i%500==0): print(f'For {i}th epoch: train accuracy: {accuracy:.3f} | validation accuracy:{val_acc:.3f}')



For 0th epoch: train accuracy: 0.062 | validation accuracy:0.070
For 500th epoch: train accuracy: 0.688 | validation accuracy:0.650
For 1000th epoch: train accuracy: 0.805 | validation accuracy:0.737
For 1500th epoch: train accuracy: 0.805 | validation accuracy:0.767
For 2000th epoch: train accuracy: 0.773 | validation accuracy:0.781
For 2500th epoch: train accuracy: 0.742 | validation accuracy:0.790
For 3000th epoch: train accuracy: 0.805 | validation accuracy:0.796
For 3500th epoch: train accuracy: 0.812 | validation accuracy:0.803
For 4000th epoch: train accuracy: 0.867 | validation accuracy:0.808
For 4500th epoch: train accuracy: 0.875 | validation accuracy:0.812
For 5000th epoch: train accuracy: 0.844 | validation accuracy:0.815
For 5500th epoch: train accuracy: 0.812 | validation accuracy:0.817
For 6000th epoch: train accuracy: 0.828 | validation accuracy:0.818
For 6500th epoch: train accuracy: 0.820 | validation accuracy:0.820
For 7000th epoch: train accuracy: 0.875 | validation

In [14]:
test_out=np.argmax(softmax(sigmoid(X_test.dot(l1)).dot(l2)),axis=1)
test_acc=(test_out==Y_test).mean().item()
print(f'Test accuracy = {test_acc*100:.2f}%')


Test accuracy = 83.83%
