<a href="https://colab.research.google.com/github/subikkshas/DA6401/blob/main/DLass1q3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install wandb -q

In [2]:
import numpy as np
import wandb

In [5]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def softmax(x):
    exps = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

def cross_entropy(y_true, y_pred):
    return -np.sum(y_true * np.log(y_pred + 1e-8)) / y_true.shape[0]

def cross_entropy_derivative(y_true, y_pred):
    return y_pred - y_true

class Optimizer:
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate
    def update(self, weights, gradients):
        raise NotImplementedError

class SGD(Optimizer):
    def update(self, weights, gradients):
        return weights - self.learning_rate * gradients

class Momentum(Optimizer):
    def __init__(self, learning_rate=0.01, momentum=0.9):
        super().__init__(learning_rate)
        self.momentum = momentum
        self.velocity = 0
    def update(self, weights, gradients):
        self.velocity = self.momentum * self.velocity - self.learning_rate * gradients
        return weights + self.velocity

class NAG(Momentum):
    def update(self, weights, gradients):
        lookahead = weights + self.momentum * self.velocity
        self.velocity = self.momentum * self.velocity - self.learning_rate * gradients
        return lookahead + self.velocity

class RMSprop(Optimizer):
    def __init__(self, learning_rate=0.01, beta=0.9, epsilon=1e-8):
        super().__init__(learning_rate)
        self.beta = beta
        self.epsilon = epsilon
        self.squared_gradients = 0
    def update(self, weights, gradients):
        self.squared_gradients = self.beta * self.squared_gradients + (1 - self.beta) * gradients ** 2
        return weights - self.learning_rate * gradients / (np.sqrt(self.squared_gradients) + self.epsilon)

class Adam(Optimizer):
    def __init__(self, learning_rate=0.01, beta1=0.9, beta2=0.999, epsilon=1e-8):
        super().__init__(learning_rate)
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = {}
        self.v = {}
        self.t = 0
    def update(self, weights, gradients):
        self.t += 1
        weights_shape = weights.shape
        if weights_shape not in self.m:
            self.m[weights_shape] = np.zeros(weights_shape)
            self.v[weights_shape] = np.zeros(weights_shape)
        self.m[weights_shape] = self.beta1 * self.m[weights_shape] + (1 - self.beta1) * gradients
        self.v[weights_shape] = self.beta2 * self.v[weights_shape] + (1 - self.beta2) * gradients ** 2
        m_hat = self.m[weights_shape] / (1 - self.beta1 ** self.t)
        v_hat = self.v[weights_shape] / (1 - self.beta2 ** self.t)
        return weights - self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)

class Nadam(Adam):
    def update(self, weights, gradients):
        self.t += 1
        self.m = self.beta1 * self.m + (1 - self.beta1) * gradients
        self.v = self.beta2 * self.v + (1 - self.beta2) * gradients ** 2
        m_hat = (self.beta1 * self.m + (1 - self.beta1) * gradients) / (1 - self.beta1 ** self.t)
        v_hat = self.v / (1 - self.beta2 ** self.t)
        return weights - self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)


In [6]:
!wandb login
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, optimizer, activation=sigmoid, activation_derivative=sigmoid_derivative):
        self.weights_input_hidden = np.random.randn(input_size, hidden_size) * 0.01
        self.weights_hidden_output = np.random.randn(hidden_size, output_size) * 0.01
        self.optimizer = optimizer
        self.activation = activation
        self.activation_derivative = activation_derivative
    def forward(self, X):
        self.hidden_input = np.dot(X, self.weights_input_hidden)
        self.hidden_output = self.activation(self.hidden_input)
        self.final_input = np.dot(self.hidden_output, self.weights_hidden_output)
        self.final_output = softmax(self.final_input)
        return self.final_output
    def backward(self, X, y_true):
        loss_gradient = cross_entropy_derivative(y_true, self.final_output)
        d_hidden_output = np.dot(loss_gradient, self.weights_hidden_output.T) * self.activation_derivative(self.hidden_output)
        grad_hidden_output = np.dot(self.hidden_output.T, loss_gradient)
        grad_input_hidden = np.dot(X.T, d_hidden_output)
        self.weights_hidden_output = self.optimizer.update(self.weights_hidden_output, grad_hidden_output)
        self.weights_input_hidden = self.optimizer.update(self.weights_input_hidden, grad_input_hidden)
    def train(self, X_train, y_train, epochs=10, batch_size=32):
        wandb.init(project="DA6401-Assignment-1", id="Question-3", settings=wandb.Settings(init_timeout=300))
        loss_history = []
        for epoch in range(epochs):
            epoch_loss = 0
            for i in range(0, X_train.shape[0], batch_size):
                X_batch = X_train[i:i+batch_size]
                y_batch = y_train[i:i+batch_size]
                predictions = self.forward(X_batch)
                loss = cross_entropy(y_batch, predictions)
                self.backward(X_batch, y_batch)
                epoch_loss += loss
            epoch_loss /= (X_train.shape[0] // batch_size)
            loss_history.append(epoch_loss)
            wandb.log({"Epoch": epoch+1, "Loss": epoch_loss})
            print(f'Epoch {epoch+1}, Loss: {epoch_loss:.4f}')
        wandb.finish()
    def predict(self, X):
        return np.argmax(self.forward(X), axis=1)

X_train = np.random.randn(1000, 784)
y_train = np.eye(10)[np.random.randint(0, 10, 1000)]

optimizer = Adam(learning_rate=0.01)
nn = NeuralNetwork(input_size=784, hidden_size=128, output_size=10, optimizer=optimizer)
nn.train(X_train, y_train, epochs=10, batch_size=32)

[34m[1mwandb[0m: Currently logged in as: [33msubikksha[0m ([33msubikksha-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
Epoch 1, Loss: 2.5269
Epoch 2, Loss: 1.9832
Epoch 3, Loss: 0.6656
Epoch 4, Loss: 0.1333
Epoch 5, Loss: 0.0463
Epoch 6, Loss: 0.0223
Epoch 7, Loss: 0.0151
Epoch 8, Loss: 0.0114
Epoch 9, Loss: 0.0091
Epoch 10, Loss: 0.0075


0,1
Epoch,▁▂▃▃▄▅▆▆▇█
Loss,█▆▃▁▁▁▁▁▁▁

0,1
Epoch,10.0
Loss,0.00755
