<a href="https://colab.research.google.com/github/omkarmande/da6401_assignment1/blob/main/Question2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from keras.datasets import fashion_mnist
import wandb

In [2]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33momkarmande[0m ([33momkarmande-iit-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], -1) / 255.0
X_test = X_test.reshape(X_test.shape[0], -1) / 255.0

num_classes = 10
y_train_onehot = np.eye(num_classes)[y_train]
y_test_onehot = np.eye(num_classes)[y_test]

split_index = int(0.9 * X_train.shape[0])
X, X_val = X_train[:split_index], X_train[split_index:]
y, y_val = y_train_onehot[:split_index], y_train_onehot[split_index:]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [4]:
def sigmoid(x):
    x = np.clip(x, -500, 500)
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return sigmoid(x) * (1 - sigmoid(x))

def tanh(x):
    x = np.clip(x, -500, 500)
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x) ** 2

def identity(x):
    return x

def identity_derivative(x):
    return np.ones_like(x)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    x = x - np.max(x, axis=1, keepdims=True)
    exp_x = np.exp(x)
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)


In [15]:
def cross_entropy_loss(y_true, y_pred):
    return -np.sum(y_true * np.log(y_pred + 1e-9)) / y_true.shape[0]

def squared_error_loss(y_true, y_pred):
    return np.mean(np.sum((y_true - y_pred) ** 2, axis=1))

def accuracy(y_true, y_pred):
    true_labels = np.argmax(y_true, axis=1)
    pred_labels = np.argmax(y_pred, axis=1)
    return np.mean(true_labels == pred_labels)

In [11]:
def initialize_weights(shape, method="xavier"):
    if method == "random":
        return np.random.randn(*shape) * 0.01
    elif method == "xavier":
        return np.random.randn(*shape) * np.sqrt(2.0 / shape[0])
    else:
        raise ValueError("Unknown initialization method: Choose 'random' or 'xavier'")

def clip_gradients(grads, clip_value=5.0):
    return [np.clip(g, -clip_value, clip_value) for g in grads]

In [16]:
class Model:
  def get_activation_functions(self, activation_type):
        activations = {
            "sigmoid": (sigmoid, sigmoid_derivative),
            "tanh": (tanh, tanh_derivative),
            "ReLu": (relu, relu_derivative),
            "identity": (identity, identity_derivative)
        }
        return activations.get(activation_type, (sigmoid, sigmoid_derivative))

  def __init__(self, il_neuron, hl_neuron, hl_count, ol_neuron, opt="adam", lr=0.1, batch=4, init="xavier", act="tanh", loss="cross_entropy", decay=0):
    self.layers = [il_neuron] + [hl_neuron]*hl_count + [ol_neuron]
    self.weights = []
    self.biases = []
    self.opt = opt
    self.lr = lr
    self.batch = batch
    self.init = init
    self.act = act
    self.loss = loss
    self.decay = decay
    self.momentum = 0.9
    self.beta1 = 0.9
    self.beta2 = 0.999
    self.epsilon = 1e-6
    self.t = 0

    self.velocities = []
    self.velocities_b = []
    self.squared_grads = []
    self.squared_grads_b = []
    self.m_t_w = []
    self.m_t_b = []
    self.v_t_w = []
    self.v_t_b = []

    self.activation_func, self.activation_derivative = self.get_activation_functions(act)
    self.loss_func = cross_entropy_loss if loss == "cross_entropy" else squared_error_loss

    #initializing and giving shape
    for i in range(len(self.layers) - 1):
        weight_matrix = initialize_weights((self.layers[i], self.layers[i + 1]), method=self.init)
        bias_vector = np.zeros((1, self.layers[i + 1]))
        self.weights.append(weight_matrix)
        self.biases.append(bias_vector)

  def feedForward(self, X):
    activations = [X]
    for i in range(len(self.weights) - 1):
      z = np.dot(activations[-1], self.weights[i]) + self.biases[i]
      #print(z.shape)
      a = self.activation_func(z)
      activations.append(a)

    z_output = np.dot(activations[-1], self.weights[-1]) + self.biases[-1]
    a_output = softmax(z_output)
    activations.append(a_output)

    return activations

In [18]:
nn = Model(il_neuron=784, hl_neuron=32, hl_count=3, ol_neuron=10, opt="adam", lr=0.1, batch=4, init="xavier", act="tanh", loss="cross_entropy", decay=0)

sample_X = X_test[:5]
outputs = nn.feedForward(sample_X)

print("Output probabilities for first five test image:")
#print(outputs[-1][0])
#print("done")
print(outputs[-1][:5])

Output probabilities for first five test image:
[[0.12154883 0.06586419 0.15177749 0.08443134 0.16502852 0.09659045
  0.03040004 0.07151684 0.16620973 0.04663256]
 [0.03656215 0.08834436 0.31016818 0.05636596 0.08922732 0.0338922
  0.08587661 0.08191317 0.18929211 0.02835794]
 [0.01823558 0.10683912 0.10427081 0.07817903 0.20175652 0.05956385
  0.14077919 0.03456315 0.0993896  0.15642313]
 [0.02006977 0.07671915 0.20199922 0.04642982 0.10668177 0.04459541
  0.18718983 0.03580323 0.16978642 0.11072537]
 [0.03900815 0.1262285  0.20837766 0.07683751 0.14389917 0.03465189
  0.10072354 0.05341146 0.16022804 0.05663408]]
