In [92]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import random
RANDOM_SEED = 42

np.random.seed(RANDOM_SEED)


In [93]:
train = pd.read_csv(r"C:\Users\User\Downloads\Week-4-Project\Week-4-Project\fashion-mnist_train.csv")
test = pd.read_csv(r"C:\Users\User\Downloads\Week-4-Project\Week-4-Project\fashion-mnist_test.csv")

Data Transformation

In [94]:
X_train = np.array(train.drop('label',axis=1)).T/255
X_test =np.array(test.drop('label',axis=1)).T/255
X_train = X_train.astype(int)
X_test = X_test.astype(int)

In [95]:
Y_train = np.zeros((10,60000)).astype(int)
y = np.array(train['label'])
for i in range(60000):
  clas = y[i]
  Y_train[:,i][clas] = 1
Y_train.shape

(10, 60000)

In [96]:
Y_test = np.zeros((10,10000)).astype(int)
y1 = np.array(test['label'])
for i in range(10000):
  clas = y1[i]
  Y_train[:,i][clas] = 1
Y_train.shape

(10, 60000)

In [97]:
print("shape of X_train :", X_train.shape, X_train.dtype)
print("shape of Y_train :", Y_train.shape, Y_train.dtype)
print("shape of X_test :", X_test.shape, X_test.dtype)
print("shape of Y_test :", Y_test.shape, Y_test.dtype)

shape of X_train : (784, 60000) int32
shape of Y_train : (10, 60000) int32
shape of X_test : (784, 10000) int32
shape of Y_test : (10, 10000) int32


In [109]:
Y_train[1]

array([0, 1, 0, ..., 0, 0, 0])

Model Components

In [98]:
#Initalising Parameters
def initialise_parameters():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

In [99]:
#Defining required functions

def ReLU(Z):
    return np.maximum(Z, 0)

def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

def ReLU_deriv(Z):
    return Z > 0

In [100]:
#Defining Model Components

def forward_prop(W1, b1, W2, b2, X):
    
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)

    return Z1, A1, Z2, A2


def cost_function(A2, Y):
    m = Y.shape[1]
    
    cost = -(1/m)*np.sum(Y*np.log(A2))
    
    #cost = -(1/m)*np.sum(np.sum(y*np.log(a2, 0), 1))
    
    return cost

def backward_prop(x, y, W1, W2, Z1, A1, Z2, A2): #Z1, A1, Z2, A2, W1, W2, X, Y
    
    m = x.shape[1]
    
    dz2 = A2 - y
    dw2 = (1/m)*np.dot(dz2, A1.T)
    db2 = (1/m)*np.sum(dz2, axis = 1, keepdims = True)
    
    dz1 = (1/m)*np.dot(W2.T, dz2)*ReLU_deriv(A1)
    dw1 = (1/m)*np.dot(dz1, x.T)
    db1 = (1/m)*np.sum(dz1, axis = 1, keepdims = True)
    
    return dw1, db1, dw2, db2
     

In [101]:
#Updating 

def updateparams(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2

Model Code

In [102]:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, iterations, alpha):
    W1, b1, W2, b2 = initialise_parameters()
    improvement = []
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(X, Y, W1, W2, Z1, A1, Z2, A2)
        W1, b1, W2, b2 = updateparams(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 10 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A2)
            print("The accurary is:  ", (get_accuracy(predictions, Y)*100), "%")
            acc = get_accuracy(predictions, Y) *100
            improvement.append(acc)
    return W1, b1, W2, b2

In [105]:
W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 500, 10)

Iteration:  0
The accurary is:   3.7144999999999997 %
Iteration:  10
The accurary is:   19.487333333333336 %
Iteration:  20
The accurary is:   3.0246666666666666 %
Iteration:  30
The accurary is:   43.35183333333333 %
Iteration:  40
The accurary is:   51.478500000000004 %
Iteration:  50
The accurary is:   4.2604999999999995 %
Iteration:  60
The accurary is:   0.004333333333333333 %
Iteration:  70
The accurary is:   43.75966666666667 %
Iteration:  80
The accurary is:   24.15266666666667 %
Iteration:  90
The accurary is:   0.7501666666666666 %
Iteration:  100
The accurary is:   0.0 %
Iteration:  110
The accurary is:   4.079166666666667 %
Iteration:  120
The accurary is:   0.9375 %
Iteration:  130
The accurary is:   0.7661666666666667 %
Iteration:  140
The accurary is:   5.183999999999999 %
Iteration:  150
The accurary is:   0.0 %
Iteration:  160
The accurary is:   0.022000000000000002 %
Iteration:  170
The accurary is:   5.831666666666667 %
Iteration:  180
The accurary is:   4.6516666666

  A = np.exp(Z) / sum(np.exp(Z))
  A = np.exp(Z) / sum(np.exp(Z))


Iteration:  270
The accurary is:   88.4985 %
Iteration:  280
The accurary is:   88.4985 %
Iteration:  290
The accurary is:   88.4985 %
Iteration:  300
The accurary is:   88.4985 %
Iteration:  310
The accurary is:   88.4985 %
Iteration:  320
The accurary is:   88.4985 %
Iteration:  330
The accurary is:   88.4985 %
Iteration:  340
The accurary is:   88.4985 %
Iteration:  350
The accurary is:   88.4985 %
Iteration:  360
The accurary is:   88.4985 %
Iteration:  370
The accurary is:   88.4985 %
Iteration:  380
The accurary is:   88.4985 %
Iteration:  390
The accurary is:   88.4985 %
Iteration:  400
The accurary is:   88.4985 %
Iteration:  410
The accurary is:   88.4985 %
Iteration:  420
The accurary is:   88.4985 %
Iteration:  430
The accurary is:   88.4985 %
Iteration:  440
The accurary is:   88.4985 %
Iteration:  450
The accurary is:   88.4985 %
Iteration:  460
The accurary is:   88.4985 %
Iteration:  470
The accurary is:   88.4985 %
Iteration:  480
The accurary is:   88.4985 %
Iteration:

The model has 88% accuracy on Train set.