<a href="https://colab.research.google.com/github/subikkshas/DA6401/blob/main/Q7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from keras.datasets import fashion_mnist
import numpy as np
from  matplotlib import pyplot as plt
import time
import math
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm

In [None]:
import wandb
!wandb login

[34m[1mwandb[0m: Currently logged in as: [33msubikksha[0m ([33msubikksha-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
dataset= fashion_mnist.load_data()
(X_train_and_validation, y_train_and_validation), (X_test, y_test) = dataset
X_train, X_validation, y_train, y_validation = train_test_split(X_train_and_validation, y_train_and_validation, test_size=0.1, random_state=42)
X_train = (X_train/255.0).astype(np.float32)
X_validation = (X_validation/255.0).astype(np.float32)
X_test = (X_test/255.0).astype(np.float32)

print("Train Dataset Shape: ", X_train.shape)
print("Train Target Vector Shape: ", y_train.shape)
print("Test Dataset Shape:", X_test.shape)
print("Test Target Vector Shape", y_test.shape)
print("Validation Dataset Shape:", X_validation.shape)
print("Validation Target Vector Shape", y_validation.shape)

Train Dataset Shape:  (54000, 28, 28)
Train Target Vector Shape:  (54000,)
Test Dataset Shape: (10000, 28, 28)
Test Target Vector Shape (10000,)
Validation Dataset Shape: (6000, 28, 28)
Validation Target Vector Shape (6000,)


In [None]:
X_train = np.array(X_train.reshape(X_train.shape[0], 784,1))
X_test = np.array(X_test.reshape(X_test.shape[0], 784,1))
X_validation = np.array(X_validation.reshape(X_validation.shape[0], 784,1))

In [None]:
def layer_init(arr,n1,n2,init_type):
    np.random.seed(10)
    if init_type=="random":
        arr.append(np.random.randn(n1,n2)*0.1)
    elif init_type=="xavier":
        arr.append(np.random.randn(n1,n2)*np.sqrt(2/(n1+n2)))
    return arr

def param(layers,init_type):
    W=[]
    B=[]
    for i in range(len(layers)-1):
        W=layer_init(W,layers[i+1],layers[i],init_type)
        B=layer_init(B,layers[i+1],1,init_type)
    return W,B

#Activation function
def activation(activation_function):
    if activation_function == 'sigmoid':
        return sigmoid
    if activation_function == 'tanh':
        return tanh
    if activation_function == 'ReLU':
        return relu

def sigmoid(x, derivative = False):
    if derivative:
        return sigmoid(x)*(1-sigmoid(x))
    return 1/(1 + np.exp(-x))

def tanh(x, derivative = False):
    if derivative:
        return 1 - tanh(x)**2
    return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))

def relu(x, derivative = False):
    if derivative:
        return (x>0)*1
    return x*(x>0)

def softmax(x,derivative = False):
    if derivative:
        return softmax(x)*(1- softmax(x))
    return np.exp(x)/np.sum(np.exp(x), axis = 0)

def one_hot(y, num_output_nodes):
    v = np.zeros((num_output_nodes, len(y)))
    for i,j in enumerate(y):
        v[j,i] = 1
    return v


def forward(x, W, B, activation_type):
    h = []
    a = []
    sigma = activation(activation_type)  #activation
    h.append(x)   #h0 = x
    a.append(np.dot(W[0], h[0]) + B[0])
    for i in range(len(W)-1):
        h.append(sigma(a[-1]))
        a.append(np.dot(W[i+1], h[-1]) + B[i+1])
    y_hat = softmax(a[-1])

    return y_hat, h, a



def loss(y,y_hat,l_type,W,reg,n_class):
    if l_type=='cross_entropy':
        err=-1*np.sum(np.multiply(one_hot(y,n_class),np.log(y_hat)))/one_hot(y,n_class).shape[1]
    elif l_type=='squared_error':
        err=np.sum((one_hot(y,n_class)-y_hat)**2)/(2*one_hot(y,n_class)).shape[1]

    if W:
        r=0
        for i in range(len(W)):
            r+=np.sum((np.array(W,dtype=object)**2)[i])
        err=err+reg*r
    return err

def eval_acc(y_hat, y_true):
    return np.mean(np.argmax(y_hat, axis = 0) ==y_true )*100


In [None]:
def back_prop(x, y, y_hat, a, h , W, B, batch_size,l_type,act_type):
    grad_h,grad_a,grad_W,grad_B = [0]*len(h),[0]*len(a),[0]*len(W),[0]*len(B)
    sigma = activation(act_type)

    if l_type == "cross_entropy":
        grad_h[-1] = -1*(y/y_hat)
        grad_a[-1] = -1*(y-y_hat)
    if l_type == "squared_error":   ##### edit this
        grad_h[-1] = y_hat - y
        grad_a[-1] = (y_hat - y)*softmax(a[-1])*(1-softmax(a[-1]))

    for i in range(len(W)-1, -1, -1):
        grad_W[i] = np.dot(grad_a[i], h[i].T)
        grad_B[i] = np.dot(grad_a[i], np.ones((grad_a[i].shape[1], 1)))
        if i > 0:
            grad_h[i-1] = np.dot(W[i].T, grad_a[i])
            grad_a[i-1]  = np.multiply(grad_h[i-1],sigma(a[i-1], derivative = True))

    return grad_W, grad_B, grad_h, grad_a

In [None]:
def sgd_step(W,B,grad_W,grad_B,lr,reg):
    W=np.array(W,dtype=object)
    B=np.array(B,dtype=object)
    W-=lr*reg*W+lr*np.array(grad_W,dtype=object)
    B-=lr*reg*B+lr*np.array(grad_B,dtype=object)

    return W.tolist(),B.tolist()



def momentum_step(w, b, gW, gB, lr=0.001, gamma=0.9, reg=0):
    params = {'w': w, 'b': b}

    Wmoments = [np.zeros_like(p) for p in params['w']]
    Bmoments = [np.zeros_like(p) for p in params['b']]

    Wmoments = gamma * np.array(Wmoments,dtype=object) + lr * np.array(gW,dtype=object)
    W = (1 - lr * reg) * np.array(params['w'],dtype=object) - Wmoments
    Wmoments = Wmoments.tolist()

    Bmoments = gamma * np.array(Bmoments,dtype=object) + lr * np.array(gB,dtype=object)
    B = (1 - lr * reg) * np.array(params['b'],dtype=object) - Bmoments
    Bmoments = Bmoments.tolist()

    return W.tolist(), B.tolist()


def RMSprop_step(w, b, gW, gB, lr=0.01, beta=0.99):
    params = {'w': w, 'b': b}

    vW = [np.zeros_like(p) for p in params['w']]
    vB = [np.zeros_like(p) for p in params['b']]

    vW = beta * np.array(vW, dtype=object) + (1 - beta) * (np.array(gW, dtype=object) ** 2)
    W = np.array(params['w'], dtype=object) - (lr / ((vW + 1e-7) ** 0.5)) * np.array(gW, dtype=object)

    vB = beta * np.array(vB, dtype=object) + (1 - beta) * (np.array(gB, dtype=object) ** 2)
    B = np.array(params['b'], dtype=object) - (lr / ((vB + 1e-7) ** 0.5)) * np.array(gB, dtype=object)

    return W.tolist(), B.tolist()



In [None]:
import wandb

def train(X_train, y_train, x_val, y_val, num_inputs_nodes, hidden_layers, out_num, init_type, epochs,
          batch_size, l_type, act_type, op_name, lr_rate, reg):

    # Ensure WandB starts fresh
    wandb.finish()

    run = wandb.init(project="DA6401-Assignment-1", name="training-run", reinit=True)

    in_node = [num_inputs_nodes]
    layers = []
    layers.extend(in_node)
    layers.extend(hidden_layers)
    layers.extend([out_num])

    W, B = param(layers, init_type)
    N = X_train.shape[0]
    n_batches = int(np.floor(N / batch_size))

    for epoch in range(epochs):
        train_loss, train_accuracy, val_loss, val_accuracy = [], [], [], []
        l, acc, ds, steps = 0, 0, 0, 1

        while ds < N:
            mini_batch_size = min((N - ds), batch_size)
            x = np.squeeze(X_train[ds:ds + mini_batch_size]).T
            y = one_hot(y_train[ds:ds + mini_batch_size], out_num)

            y_hat, h, a = forward(x, W, B, act_type)
            grad_W, grad_B, grad_h, grad_a = back_prop(x, y, y_hat, a, h, W, B, batch_size, l_type, act_type)

            if op_name == 'sgd':
                W, B = sgd_step(W, B, grad_W, grad_B, lr_rate, reg)
            elif op_name == 'momentum':
                W, B = momentum_step(W, B, grad_W, grad_B, lr_rate, reg)
            elif op_name == 'rmsprop':
                W, B = RMSprop_step(W, B, grad_W, grad_B, lr_rate, reg)

            l += loss(y_train[ds:ds + mini_batch_size], y_hat, l_type, W, reg, out_num)
            acc += eval_acc(y_hat, y_train[ds:ds + mini_batch_size])

            steps += 1
            ds += batch_size

        l /= (n_batches + mini_batch_size)
        acc /= steps

        train_loss.append(l)
        train_accuracy.append(acc)

        y_val_hat, _, _ = forward(np.squeeze(x_val).T, W, B, act_type)
        val_acc = eval_acc(y_val_hat, y_val)
        val_l = loss(y_val, y_val_hat, l_type, W=None, reg=reg, n_class=out_num)

        val_accuracy.append(val_acc)
        val_loss.append(val_l)

        wandb.log({"epoch": epoch, "Train_loss": l, "Train_acc": acc, "val_loss": val_l, "val_Accuracy": val_acc})

        print(f"Epoch {epoch}: Train Loss = {l:.4f}, Train Accuracy = {acc:.4f}, Val Loss = {val_l:.4f}, Val Accuracy = {val_acc:.4f}")

    wandb.finish()  # Explicitly finish the WandB run

    return W, B, train_loss, train_accuracy, val_loss, val_accuracy


In [None]:
!wandb login 7ea09f7c4132c66373b03708516876ea8ecc67cc

# 🔹 Load Fashion-MNIST Dataset
(_, _), (X_test, y_test) = fashion_mnist.load_data()
X_test = X_test.reshape(-1, 784) / 255.0  # Flatten images for the model

# 🔹 Train the Model
W_new, B_new, _, _, _, _ = train(
    X_train, y_train, X_validation, y_validation, 784,
    [128, 64, 32], 10, "xavier", 10, 64, "cross_entropy",
    "sigmoid", 'momentum', 0.001, 0
)

# 🔹 Forward Pass to Get Predictions
Y, _, _ = forward(np.squeeze(X_validation).T, W_new, B_new, "sigmoid")
Y_prediction = np.argmax(Y, axis=0)

# 🔹 Define Class Labels
labels_dict_names = [
    "T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
    "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"
]

wandb.init(project="DA6401-Assignment-1", name="Q7_Confusion_Matrix", reinit=True)
# 🔹 Log the confusion matrix within the same WandB run
wandb.log({
    "Confusion Matrix": wandb.plot.confusion_matrix(
        probs=None, y_true=y_validation, preds=Y_prediction, class_names=labels_dict_names
    )
})

# 🔹 Finish WandB Logging
wandb.finish()

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 0: Train Loss = 2.0926, Train Accuracy = 20.6990, Val Loss = 1.8619, Val Accuracy = 33.0333
Epoch 1: Train Loss = 1.3749, Train Accuracy = 47.6177, Val Loss = 1.1828, Val Accuracy = 58.0667
Epoch 2: Train Loss = 1.0001, Train Accuracy = 60.5510, Val Loss = 0.9513, Val Accuracy = 63.8167
Epoch 3: Train Loss = 0.8362, Train Accuracy = 66.3622, Val Loss = 0.8302, Val Accuracy = 70.4500
Epoch 4: Train Loss = 0.7418, Train Accuracy = 70.6182, Val Loss = 0.7435, Val Accuracy = 73.2000
Epoch 5: Train Loss = 0.6695, Train Accuracy = 74.2745, Val Loss = 0.6735, Val Accuracy = 75.4500
Epoch 6: Train Loss = 0.6115, Train Accuracy = 76.4016, Val Loss = 0.6169, Val Accuracy = 77.2000
Epoch 7: Train Loss = 0.5671, Train Accuracy = 77.8667, Val Loss = 0.5757, Val Accuracy = 78.5333
Epoch 8: Train Loss = 0.5343, Train Accuracy = 79.1032, Val Loss = 0.5448, Val Accuracy = 79.9667
Epoch 9: Train Loss = 0.5082, Train Accuracy = 80.4629, Val Loss = 0.5193, Val Accuracy = 81.3500


0,1
Train_acc,▁▄▆▆▇▇████
Train_loss,█▅▃▂▂▂▁▁▁▁
epoch,▁▂▃▃▄▅▆▆▇█
val_Accuracy,▁▅▅▆▇▇▇███
val_loss,█▄▃▃▂▂▂▁▁▁

0,1
Train_acc,80.46289
Train_loss,0.50823
epoch,9.0
val_Accuracy,81.35
val_loss,0.51931
