<a href="https://colab.research.google.com/github/manglesh001/DL-assigment1/blob/main/DL_ASS1_Q7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from tensorflow.keras.datasets import fashion_mnist
import wandb
from sklearn.preprocessing import OneHotEncoder


In [2]:
# Load Fashion-MNIST dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
X_test = X_test.reshape(X_test.shape[0], -1) / 255.0

# One-hot encode labels
y_train = np.eye(10)[y_train]
y_test_original = y_test  # Keep original labels for confusion matrix

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [3]:
# Activation functions and derivatives
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

In [4]:
# Weight Initialization
def initialize_weights(layers, method="xavier"):
    weights = []
    biases = []
    for i in range(len(layers) - 1):
        if method == "xavier":
            weights.append(np.random.randn(layers[i], layers[i+1]) * np.sqrt(1 / layers[i]))
        else:  # random
            weights.append(np.random.randn(layers[i], layers[i+1]) * 0.01)
        biases.append(np.zeros((1, layers[i+1])))
    return weights, biases

In [5]:
# Forward Propagation
def forward_propagation(X, weights, biases, activation):
    activations = [X]
    for i in range(len(weights)):
        z = np.dot(activations[-1], weights[i]) + biases[i]
        if activation[i] == "sigmoid":
            activations.append(sigmoid(z))
    return activations

In [6]:
# Backpropagation
def backpropagation(y, activations, weights):
    gradients_w = [None] * len(weights)
    gradients_b = [None] * len(weights)
    error = activations[-1] - y

    for i in reversed(range(len(weights))):
        delta = error * sigmoid_derivative(activations[i+1])
        gradients_w[i] = np.dot(activations[i].T, delta)
        gradients_b[i] = np.sum(delta, axis=0, keepdims=True)
        error = np.dot(delta, weights[i].T)

    return gradients_w, gradients_b

In [7]:
# RMSprop Optimizer
def rmsprop(weights, biases, gradients_w, gradients_b, lr, cache_w, cache_b, beta=0.99, epsilon=1e-8):
    for i in range(len(weights)):
        cache_w[i] = beta * cache_w[i] + (1 - beta) * (gradients_w[i] ** 2)
        weights[i] -= lr * gradients_w[i] / (np.sqrt(cache_w[i]) + epsilon)
        cache_b[i] = beta * cache_b[i] + (1 - beta) * (gradients_b[i] ** 2)
        biases[i] -= lr * gradients_b[i] / (np.sqrt(cache_b[i]) + epsilon)
    return weights, biases, cache_w, cache_b


In [8]:
# the best model parameters
best_config = {
    'hidden_layers': 3,
    'hidden_size': 64,
    'activation': 'sigmoid',
    'weight_init': 'xavier',
    'optimizer': 'rmsprop',
    'batch_size': 16,
    'epochs': 10,
    'learning_rate': 0.001,
    'weight_decay': 0
}

In [9]:
# Initialize Wandb
wandb.init(project="fashion-mnist", config=best_config)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mmangleshpatidar2233[0m ([33mmangleshpatidar2233-iit-madras-alumni-association[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [10]:
def train_network(X_train, y_train, X_val, y_val, config):
    np.random.seed(42)
    layers = [X_train.shape[1]] + [config['hidden_size']] * config['hidden_layers'] + [10]
    activation = [config['activation']] * config['hidden_layers'] + ['sigmoid']

    weights, biases = initialize_weights(layers, config['weight_init'])
    cache_w = [np.zeros_like(w) for w in weights]
    cache_b = [np.zeros_like(b) for b in biases]

    batch_size = config['batch_size']
    epochs = config['epochs']
    lr = config['learning_rate']

    for epoch in range(epochs):
        indices = np.random.permutation(X_train.shape[0])
        X_train_shuffled, y_train_shuffled = X_train[indices], y_train[indices]

        train_loss = 0
        train_correct = 0
        train_total = 0

        for i in range(0, X_train_shuffled.shape[0], batch_size):
            X_batch = X_train_shuffled[i:i+batch_size]
            y_batch = y_train_shuffled[i:i+batch_size]

            # Forward propagation
            activations = forward_propagation(X_batch, weights, biases, activation)

            # Calculate training loss (cross-entropy loss)
            output = activations[-1]
            train_loss += -np.sum(y_batch * np.log(output + 1e-8)) / len(y_batch)

            # Calculate training accuracy
            train_preds = np.argmax(output, axis=1)
            train_true = np.argmax(y_batch, axis=1)
            train_correct += np.sum(train_preds == train_true)
            train_total += len(y_batch)

            # Backpropagation
            gradients_w, gradients_b = backpropagation(y_batch, activations, weights)
            weights, biases, cache_w, cache_b = rmsprop(weights, biases, gradients_w, gradients_b, lr, cache_w, cache_b)

        # Calculate average training loss and accuracy for the epoch
        train_loss /= (X_train_shuffled.shape[0] // batch_size)
        train_accuracy = train_correct / train_total

        # Log training metrics to Wandb
        wandb.log({
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "train_accuracy": train_accuracy
        })

        # Print training metrics
        print(f"Epoch {epoch + 1}/{epochs}, "
              f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")

    return weights, biases

In [11]:

# Train the best model split  train and val
X_train, X_val = X_train[:54000], X_train[54000:]
y_train, y_val = y_train[:54000], y_train[54000:]
weights, biases = train_network(X_train, y_train, X_val, y_val, best_config)

Epoch 1/10, Train Loss: 0.7517, Train Accuracy: 0.7261
Epoch 2/10, Train Loss: 0.4868, Train Accuracy: 0.8454
Epoch 3/10, Train Loss: 0.4351, Train Accuracy: 0.8616
Epoch 4/10, Train Loss: 0.4039, Train Accuracy: 0.8708
Epoch 5/10, Train Loss: 0.3847, Train Accuracy: 0.8769
Epoch 6/10, Train Loss: 0.3670, Train Accuracy: 0.8835
Epoch 7/10, Train Loss: 0.3555, Train Accuracy: 0.8876
Epoch 8/10, Train Loss: 0.3458, Train Accuracy: 0.8919
Epoch 9/10, Train Loss: 0.3377, Train Accuracy: 0.8944
Epoch 10/10, Train Loss: 0.3306, Train Accuracy: 0.8967


In [12]:
# Evaluate on the test set
activations = forward_propagation(X_test, weights, biases, [best_config['activation']] * best_config['hidden_layers'] + ['sigmoid'])
test_predictions = np.argmax(activations[-1], axis=1)

# One-hot encode y_test_original for loss calculation
y_test_one_hot = np.eye(10)[y_test_original]

# Calculate test loss (cross-entropy loss)
test_loss = -np.sum(y_test_one_hot * np.log(activations[-1] + 1e-8)) / len(y_test_original)

In [13]:
# Calculate test accuracy
test_accuracy = np.mean(test_predictions == y_test_original)

# Log test metrics to Wandb
wandb.log({
    "test_loss": test_loss,
    "test_accuracy": test_accuracy
})

In [14]:
# Print test metrics
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Test Loss: 0.3908, Test Accuracy: 0.8718


In [15]:
# Generate confusion matrix
cm = confusion_matrix(y_test_original, test_predictions)

# Plot confusion matrix
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat','Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

plt.figure(figsize=(12, 10))
sns.heatmap(cm, annot=True, fmt='d', cmap='viridis', xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix for Fashion-MNIST Test Set', fontsize=16)
plt.xlabel('Predicted Labels', fontsize=14)
plt.ylabel('True Labels', fontsize=14)
plt.xticks(rotation=45)
plt.yticks(rotation=0)

# Save the confusion matrix plot to a file
confusion_matrix_path = "confusion_matrix.png"
plt.savefig(confusion_matrix_path)
plt.close()

# Log the confusion matrix as an image to Wandb
wandb.log({"confusion_matrix": wandb.Image(confusion_matrix_path)})

# Finish Wandb run
wandb.finish()


0,1
epoch,▁▂▃▃▄▅▆▆▇█
test_accuracy,▁
test_loss,▁
train_accuracy,▁▆▇▇▇▇████
train_loss,█▄▃▂▂▂▁▁▁▁

0,1
epoch,10.0
test_accuracy,0.8718
test_loss,0.39076
train_accuracy,0.89669
train_loss,0.33064
