In [34]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import copy

In [35]:
df = pd.read_csv("fashion-mnist_test.csv")

In [36]:
df_numeric = df.select_dtypes(include=[np.number])
df_numeric = df_numeric.dropna()
X = df_numeric.drop('label', axis=1).values
Y = df_numeric['label'].values 
X = X/255.00
X_train, X_test, Y_train, Y_test = train_test_split(X,  Y, test_size=0.3)
X_train = X_train.T 
X_test = X_test.T    
Y_train = Y_train.reshape(1, -1)  
Y_test = Y_test.reshape(1, -1) 
print("X_train shape:", X_train.shape)
print("Y_train shape:", Y_train.shape)
print("X_test shape:", X_test.shape)
print("Y_test shape:", Y_test.shape)

X_train shape: (784, 7000)
Y_train shape: (1, 7000)
X_test shape: (784, 3000)
Y_test shape: (1, 3000)


In [37]:
layers_dims = [784,128,64,10]

In [38]:

def relu(x):
    A = np.maximum(0,x)
    cache = x 
    return A,cache 

def softmax(x):
    expZ = np.exp(x - np.max(x, axis=0, keepdims=True)) 
    A = expZ / np.sum(expZ, axis=0, keepdims=True)
    return A

def relu_backward(dA, cache):
    Z = cache
    dZ = np.zeros_like(Z)
    dZ[Z > 0] = dA[Z > 0]
    return dZ


In [39]:
def initialize_parameters_deep(layer_dims):
    np.random.seed(3)
    parameters = {}
    L = len(layer_dims)
    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * np.sqrt(2 / layer_dims[l-1])
        parameters['b' + str(l)] = np.zeros((layer_dims[l],1))
    return parameters


In [40]:
def linear_forward(A, W, b):
    Z = np.dot(W,A)+b
    cache = (A, W, b)
    return Z, cache

In [41]:
def linear_activation_forward(A_prev, W, b, activation):
    if activation == "relu":
        Z, linear_cache = linear_forward(A_prev,W,b)
        A, activation_cache = relu(Z)
    elif activation == "softmax":
        Z, linear_cache = linear_forward(A_prev,W,b)
        A = softmax(Z)
        activation_cache = Z
    cache = (linear_cache, activation_cache)
    return A,cache

In [42]:
def L_model_forward(X, parameters):
    caches = []
    A = X
    L = len(parameters) // 2
    for l in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation="relu")
        caches.append(cache)

    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], activation="softmax")
    caches.append(cache)
    return AL, caches

In [43]:
def compute_cost(AL, Y):
    m = Y.shape[1]
    log_probs = np.log(AL[Y[0, :], np.arange(m)])
    cost = -np.sum(log_probs) / m
    cost = np.squeeze(cost)
    return cost

In [44]:
def linear_backward(dZ, cache):
    A_prev, W, b = cache
    m = A_prev.shape[1]
    dW = (1/m) * np.dot(dZ, A_prev.T)
    db = (1/m) * np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db

In [45]:
def linear_activation_backward(dA, cache, activation):
    linear_cache, activation_cache = cache
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif activation =="softmax":
        dZ = dA
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return dA_prev,dW,db

In [46]:
def L_model_backward(AL, Y, caches):
    grads = {}
    L = len(caches) 
    m = AL.shape[1]
    dZL = AL.copy()
    for i in range(m):
        dZL[Y[0, i], i] -= 1   
    current_cache = caches[L-1][0]
    dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dZL, caches[L-1], activation="softmax")
    grads["dA" + str(L-1)] = dA_prev_temp
    grads["dW" + str(L)]   = dW_temp
    grads["db" + str(L)]   = db_temp

    for l in reversed(range(L-1)):
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l+1)], current_cache, activation="relu")
        grads["dA" + str(l)]     = dA_prev_temp
        grads["dW" + str(l+1)]   = dW_temp
        grads["db" + str(l+1)]   = db_temp

    return grads
        

In [47]:
def update_parameters(params,grads,learning_rate):
    parameters = copy.deepcopy(params)
    L = len(parameters) // 2 
    for l in range(L):
        parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l+1)]
        parameters["b" + str(l+1)] =parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l+1)]
    return parameters

     

In [48]:
def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 1000, print_cost=False):
    np.random.seed(1)
    costs = []  
    parameters = initialize_parameters_deep(layers_dims)
    for i in range(0, num_iterations):
        AL,caches = L_model_forward(X, parameters)
        cost = compute_cost(AL,Y)
        grads = L_model_backward(AL,Y,caches)
        parameters = update_parameters(parameters, grads, learning_rate)
        if print_cost and i % 100 == 0:
            print(f"Cost after iteration {i}: {cost}")
            costs.append(cost)
    
    return parameters, costs

In [49]:
layers_dims = [784, 128, 64, 10]  
parameters, costs = L_layer_model(X_train, Y_train, layers_dims,learning_rate=0.0075, num_iterations=2000, print_cost=True)


Cost after iteration 0: 2.396536481375951
Cost after iteration 100: 1.4649942077675357
Cost after iteration 200: 1.0948729786452815
Cost after iteration 300: 0.9165322364721422
Cost after iteration 400: 0.8180115854951153
Cost after iteration 500: 0.7531976335553506
Cost after iteration 600: 0.7060486650179494
Cost after iteration 700: 0.6692273475351636
Cost after iteration 800: 0.6395433920961912
Cost after iteration 900: 0.6149793311470708
Cost after iteration 1000: 0.5943096277146485
Cost after iteration 1100: 0.5765465325521487
Cost after iteration 1200: 0.5609438416098197
Cost after iteration 1300: 0.5470892024558881
Cost after iteration 1400: 0.5348893296888304
Cost after iteration 1500: 0.5239729377727883
Cost after iteration 1600: 0.5140995644195416
Cost after iteration 1700: 0.5051052036893914
Cost after iteration 1800: 0.4968793480152223
Cost after iteration 1900: 0.48931789914666324


In [50]:
def predict(X, parameters):
    AL, _ = L_model_forward(X, parameters)
    predictions = np.argmax(AL, axis=0)
    return predictions

Y_pred_train = predict(X_train, parameters)
Y_pred_test = predict(X_test, parameters)

train_acc = np.mean(Y_pred_train == Y_train[0]) * 100
test_acc  = np.mean(Y_pred_test == Y_test[0]) * 100

print(f"Train Accuracy: {train_acc:.2f}%")
print(f"Test Accuracy: {test_acc:.2f}%")


Train Accuracy: 84.10%
Test Accuracy: 82.40%
