In [1]:
import pandas as pd
import numpy as np

In [2]:
x = pd.read_csv("./fashion-mnist_test.csv")
X_ = np.array(x)
X = X_[:,1:]
X = X/255.0
y = X_[:,0]

In [3]:
X.shape,y.shape

((10000, 784), (10000,))

In [4]:
X_train = X[:8000,:]
X_val = X[8000:,:]
y_train = y[:8000]
y_val = y[8000:]

In [5]:
IMG_SIZE = 28*28
H1_SIZE = 256
H2_SIZE = 64
OUT_SIZE = 10
BATCH_SIZE = 256
EPOCH = 50
ALPHA = 0.001

In [6]:
def accuracy(pred,y):
    return ( 100.0* np.sum(pred==y) / y.shape[0])

In [7]:
def initial_weights():
    np.random.seed(0)
    model = {}
    model['W1'] = np.random.randn(IMG_SIZE,H1_SIZE)/ np.sqrt(IMG_SIZE)
    model['B1'] = np.zeros((1,H1_SIZE))
    model['W2'] = np.random.randn(H1_SIZE,H2_SIZE)/ np.sqrt(H1_SIZE)
    model['B2'] = np.zeros((1,H2_SIZE))
    model['W3'] = np.random.randn(H2_SIZE,OUT_SIZE)/ np.sqrt(H2_SIZE)
    model['B3'] = np.zeros((1,OUT_SIZE))
    return model

In [8]:
def forward_prop(model,x):
    z1 = x.dot(model['W1']) + model['B1']
    a1 = np.tanh(z1)
    z2 = a1.dot(model['W2']) + model['B2']
    a2 = np.tanh(z2)
    z3 = a2.dot(model['W3']) + model['B3']
    h_x = np.exp(z3)
    y_out = h_x/ np.sum(h_x, axis=1, keepdims=True)
    return a1, a2, y_out

In [9]:
def back_prop(model, x ,a1 , a2, y, y_out):
    delta4 = y_out
    delta4[range(y.shape[0]), y] -= 1
    dw3 = (a2.T).dot(delta4)
    db3 = np.sum(delta4, axis = 0)
    delta3 = (1 - np.square(a2))*delta4.dot(model['W3'].T)
    dw2 = (a1.T).dot(delta3)
    db2 = np.sum(delta3, axis = 0)
    delta2 = (1 - np.square(a1))*delta3.dot(model['W2'].T)
    dw1 = (x.T).dot(delta2)
    db1 = np.sum(delta2, axis = 0)
    
    model['W1'] += -ALPHA*dw1
    model['B1'] += -ALPHA*db1
    model['W2'] += -ALPHA*dw2
    model['B2'] += -ALPHA*db2
    model['W3'] += -ALPHA*dw3
    model['B3'] += -ALPHA*db3
    
    return model

In [10]:
def loss(model, p, y):
    correct_logprobs = -np.log(p[range(y.shape[0]),y])
    l = np.sum(correct_logprobs)
    
    return 1.0/y.shape[0] * l

In [11]:
def predict(y_out):
    return np.argmax(y_out, axis = 1)

In [12]:
def main():
    model = initial_weights()
    for ix in range(EPOCH):
        print ("\nEpoch : %d" %(ix+1))
        count = 0
        while (count+BATCH_SIZE) < y_train.shape[0]:
            batch_data = X_train[count:(count+BATCH_SIZE),:]
            batch_labels = y_train[count:(count+BATCH_SIZE),]
            count += BATCH_SIZE
            
            a1, a2 , p = forward_prop(model, batch_data)
            model = back_prop(model,batch_data,a1,a2,batch_labels,p)
        
        _,_, p = forward_prop(model, X_train)
        print ('training_loss : % .3f' % (loss(model,p,y_train)))
        _,_,p = forward_prop(model, X_val)
        pred = predict(p)
        print ('val_accuracy : % .3f' % (accuracy(pred,y_val)))
        print ('val_loss : % .3f' % loss(model,p,y_val))
    print("*************Completed***********")

In [13]:
main()


Epoch : 1
training_loss :  0.766
val_accuracy :  71.350
val_loss :  0.758

Epoch : 2
training_loss :  0.662
val_accuracy :  75.850
val_loss :  0.658

Epoch : 3
training_loss :  0.580
val_accuracy :  78.750
val_loss :  0.577

Epoch : 4
training_loss :  0.562
val_accuracy :  78.700
val_loss :  0.549

Epoch : 5
training_loss :  0.536
val_accuracy :  79.300
val_loss :  0.541

Epoch : 6
training_loss :  0.521
val_accuracy :  80.450
val_loss :  0.519

Epoch : 7
training_loss :  0.497
val_accuracy :  81.150
val_loss :  0.501

Epoch : 8
training_loss :  0.501
val_accuracy :  80.850
val_loss :  0.505

Epoch : 9
training_loss :  0.506
val_accuracy :  80.300
val_loss :  0.518

Epoch : 10
training_loss :  0.458
val_accuracy :  81.750
val_loss :  0.475

Epoch : 11
training_loss :  0.447
val_accuracy :  82.500
val_loss :  0.464

Epoch : 12
training_loss :  0.450
val_accuracy :  82.450
val_loss :  0.467

Epoch : 13
training_loss :  0.442
val_accuracy :  82.150
val_loss :  0.470

Epoch : 14
training_