<a href="https://colab.research.google.com/github/manasdeshpande125/da6401_assignment1/blob/main/DL_ASG1_Q2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Importing Necessary Libraries**

In [1]:
from keras.datasets import fashion_mnist
import numpy as np
from  matplotlib import pyplot as plt
import time
import math
from sklearn.model_selection import train_test_split
import wandb

**Loading the Dataset**

In [2]:
dataset= fashion_mnist.load_data()
(X_train_and_validation, y_train_and_validation), (X_test, y_test) = dataset
X_train, X_validation, y_train, y_validation = train_test_split(X_train_and_validation, y_train_and_validation, test_size=0.1, random_state=42)
X_train = (X_train/255.0).astype(np.float32)
X_validation = (X_validation/255.0).astype(np.float32)
X_test = (X_test/255.0).astype(np.float32)

print("Train Dataset Shape: ", X_train.shape)
print("Train Target Vector Shape: ", y_train.shape)
print("Test Dataset Shape:", X_test.shape)
print("Test Target Vector Shape", y_test.shape)
print("Validation Dataset Shape:", X_validation.shape)
print("Validation Target Vector Shape", y_validation.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Train Dataset Shape:  (54000, 28, 28)
Train Target Vector Shape:  (54000,)
Test Dataset Shape: (10000, 28, 28)
Test Target Vector Shape (10000,)
Validation Dataset Shape: (6000, 28, 28)
Validation Target Vector Shape (6000,)


In [3]:
X_train = np.array(X_train.reshape(X_train.shape[0], 784,1))
X_test = np.array(X_test.reshape(X_test.shape[0], 784,1))
X_validation = np.array(X_validation.reshape(X_validation.shape[0], 784,1))

**Initialization Functions**

In [4]:
def layer_init(arr,n1,n2,init_type):
    np.random.seed(10)
    if init_type=="random":
        arr.append(np.random.randn(n1,n2)*0.1)
    elif init_type=="xavier":
        arr.append(np.random.randn(n1,n2)*np.sqrt(2/(n1+n2)))
    return arr

def param(num_input_nodes, num_hidden_layers, hidden_layer_size, out_num, init_type):
    W = []
    B = []

    layers = [num_input_nodes]  # Input layer
    layers.extend([hidden_layer_size] * num_hidden_layers)  # Dynamic hidden layers
    layers.append(out_num)  # Output layer

    for i in range(len(layers) - 1):
        W = layer_init(W, layers[i + 1], layers[i], init_type)
        B = layer_init(B, layers[i + 1], 1, init_type)

    return W, B

**Activation Functions**

In [5]:
#Activation function
def activation(activation_function):
    if activation_function == 'sigmoid':
        return sigmoid
    if activation_function == 'tanh':
        return tanh
    if activation_function == 'ReLU':
        return relu

def sigmoid(x, derivative = False):
    if derivative:
        return sigmoid(x)*(1-sigmoid(x))
    return 1/(1 + np.exp(-x))

def tanh(x, derivative = False):
    if derivative:
        return 1 - tanh(x)**2
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))

def relu(x, derivative = False):
    if derivative:
        return (x>0)*1
    return x*(x>0)

def softmax(x,derivative = False):
    if derivative:
        return softmax(x)*(1- softmax(x))
    return np.exp(x)/np.sum(np.exp(x), axis = 0)

def one_hot(y, num_output_nodes):
    v = np.zeros((num_output_nodes, len(y)))
    for i,j in enumerate(y):
        v[j,i] = 1
    return v


def softmax1(x,derivative = False):
    if derivative:
        return softmax1(x)*(1- softmax1(x))
    x = np.array(x)
    x -= np.max(x, axis=0, keepdims=True)  # Normalize values to avoid large exponentials
    exp_x = np.exp(x)
    return exp_x / (np.sum(exp_x, axis=0, keepdims=True) + 1e-10)  # Prevent divide by zero

**Feed Forward Propagation**

In [6]:
def forward(x, W, B, activation_type):
    h = []
    a = []
    sigma = activation(activation_type)  #activation
    h.append(x)   #h0 = x
    a.append(np.dot(W[0], h[0]) + B[0])
    for i in range(len(W)-1):
        h.append(sigma(a[-1]))
        a.append(np.dot(W[i+1], h[-1]) + B[i+1])
    y_hat = softmax1(a[-1])

    return y_hat, h, a

**Call to feed forward**

In [7]:
num_inputs_nodes = 784
num_hidden_layers=3
hidden_layer_size = 64
num_output_nodes = 10
init_type = "random"
activation_type = "sigmoid"
x = np.squeeze(X_train[5, :]).reshape(784,1)
#print(x.shape)
W_initial, B_initial = param(num_inputs_nodes, num_hidden_layers, hidden_layer_size, 10, init_type)

y_hat, h, a = forward(x, W_initial, B_initial, activation_type)

In [8]:
print(y_hat)
print(y_train[5])

[[0.16627612]
 [0.08341476]
 [0.093154  ]
 [0.06220563]
 [0.1199608 ]
 [0.08469965]
 [0.08855028]
 [0.09151501]
 [0.04824781]
 [0.16197594]]
9


In [9]:
for i in range(10):
  y_hat, h, a = forward(np.squeeze(X_train[i, :]).reshape(784,1) , W_initial, B_initial, activation_type)
  print("************Output for image ",i, "*************")
  print(y_hat)

************Output for image  0 *************
[[0.16535935]
 [0.08452172]
 [0.09344616]
 [0.06179034]
 [0.12004742]
 [0.08473359]
 [0.08982494]
 [0.09174448]
 [0.04782148]
 [0.16071052]]
************Output for image  1 *************
[[0.16535467]
 [0.08539501]
 [0.09257386]
 [0.06163838]
 [0.11990898]
 [0.08419185]
 [0.08860838]
 [0.09172437]
 [0.04835878]
 [0.16224571]]
************Output for image  2 *************
[[0.16597428]
 [0.08345734]
 [0.09324991]
 [0.06222515]
 [0.11955948]
 [0.08483726]
 [0.08872647]
 [0.09137965]
 [0.04827091]
 [0.16231956]]
************Output for image  3 *************
[[0.16496699]
 [0.08489217]
 [0.09222594]
 [0.06161226]
 [0.12038857]
 [0.08509912]
 [0.08907748]
 [0.09219849]
 [0.04801234]
 [0.16152664]]
************Output for image  4 *************
[[0.16610213]
 [0.08388913]
 [0.09348495]
 [0.06194124]
 [0.11947023]
 [0.08455027]
 [0.08910457]
 [0.09165282]
 [0.04809485]
 [0.16170982]]
************Output for image  5 *************
[[0.16627612]
 [0.0