<a href="https://colab.research.google.com/github/subikkshas/DA6401/blob/main/Optimizers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Optimizers

import numpy as np

class sgd:
    """Implements standard gradient descent (SGD)."""

    def __init__(self, learning_rate=0.01):
        self.update_step = 0
        self.learning_rate = learning_rate

    def configure_params(self, parameters):
        for key, value in parameters.items():
            setattr(self, key, value)

    def compute_update(self, gradient):
        """Computes the update step for SGD."""
        self.update_step = self.learning_rate * gradient
        return self.update_step


class momentum:
    """Implements Momentum-based gradient descent."""

    def __init__(self, learning_rate=1e-3, momentum=0.9):
        self.update_step = 0
        self.learning_rate = learning_rate
        self.momentum = momentum

    def configure_params(self, parameters):
        for key, value in parameters.items():
            setattr(self, key, value)

    def compute_update(self, gradient):
        """Computes the update step using Momentum."""
        self.update_step = self.momentum * self.update_step + self.learning_rate * gradient
        return self.update_step


class nesterov:
    """Implements Nesterov Accelerated Gradient (NAG)."""

    def __init__(self, learning_rate=1e-3, momentum=0.9):
        self.update_step = 0
        self.learning_rate = learning_rate
        self.momentum = momentum

    def configure_params(self, parameters):
        for key, value in parameters.items():
            setattr(self, key, value)

    def compute_update(self, weights, gradient_fn):
        """Computes the update using Nesterov momentum."""
        lookahead_weights = weights - self.momentum * self.update_step
        self.update_step = self.momentum * self.update_step + self.learning_rate * gradient_fn(lookahead_weights)
        return weights - self.update_step


class rmsprop:
    """Implements RMSProp optimization."""

    def __init__(self, decay_rate=0.9, learning_rate=1e-3, epsilon=1e-7):
        self.accumulated_squared_grad = 0
        self.decay_rate = decay_rate
        self.learning_rate = learning_rate
        self.epsilon = epsilon

    def configure_params(self, parameters):
        for key, value in parameters.items():
            setattr(self, key, value)

    def compute_update(self, gradient):
        """Computes the update using RMSProp."""
        self.accumulated_squared_grad = self.decay_rate * self.accumulated_squared_grad + (1 - self.decay_rate) * (gradient ** 2)
        return (self.learning_rate / (np.sqrt(self.accumulated_squared_grad) + self.epsilon)) * gradient


class adam:
    """Implements the Adam optimizer."""

    def __init__(self, beta1=0.9, beta2=0.999, learning_rate=1e-2, epsilon=1e-8):
        self.momentum = 0
        self.accumulated_squared_grad = 0
        self.beta1 = beta1
        self.beta2 = beta2
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.timestep = 1

    def configure_params(self, parameters):
        for key, value in parameters.items():
            setattr(self, key, value)

    def compute_update(self, gradient):
        """Computes the update using Adam."""
        self.momentum = self.beta1 * self.momentum + (1 - self.beta1) * gradient
        self.accumulated_squared_grad = self.beta2 * self.accumulated_squared_grad + (1 - self.beta2) * (gradient ** 2)

        corrected_momentum = self.momentum / (1 - self.beta1 ** self.timestep)
        corrected_accumulated_grad = self.accumulated_squared_grad / (1 - self.beta2 ** self.timestep)

        self.timestep += 1
        return (self.learning_rate / (np.sqrt(corrected_accumulated_grad) + self.epsilon)) * corrected_momentum


class nadam:
    """Implements the Nadam optimizer (Adam + Nesterov momentum)."""

    def __init__(self, beta1=0.9, beta2=0.999, learning_rate=1e-3, epsilon=1e-7):
        self.momentum = 0
        self.accumulated_squared_grad = 0
        self.beta1 = beta1
        self.beta2 = beta2
        self.learning_rate = learning_rate
        self.epsilon = epsilon
        self.timestep = 1

    def configure_params(self, parameters):
        for key, value in parameters.items():
            setattr(self, key, value)

    def compute_update(self, gradient):
        """Computes the update using Nadam."""
        self.momentum = self.beta1 * self.momentum + (1 - self.beta1) * gradient
        self.accumulated_squared_grad = self.beta2 * self.accumulated_squared_grad + (1 - self.beta2) * (gradient ** 2)

        corrected_momentum = self.momentum / (1 - self.beta1 ** self.timestep)
        corrected_accumulated_grad = self.accumulated_squared_grad / (1 - self.beta2 ** self.timestep)

        momentum_adjusted = self.beta1 * corrected_momentum + ((1 - self.beta1) / (1 - self.beta1 ** self.timestep)) * gradient
        self.timestep += 1

        return (self.learning_rate / (np.sqrt(corrected_accumulated_grad) + self.epsilon)) * momentum_adjusted
