# Documentation

#### run.py

In [None]:
import argparse
import torch

if __name__=="__main__":
    # Creates argument parser object
    parser = argparse.ArgumentParser(description='Biological deep learning')
    
    # Adds arguments to the parser and sets their default values
    parser.add_argument('--is_training', type=int, default=1, help='status')
    parser.add_argument('--data_root', type=str, default="data/mnist/")
    parser.add_argument('--train_data_filename', type=str, default="mnist_train.csv")
    parser.add_argument('--test_data_filename', type=str, default="mnist_test.csv")
    parser.add_argument('--learning_rate', type=int, default=0.001)
    parser.add_argument('--num_epochs', type=int, default=100)
    
    # Parse arguments given to the parser
    args=parser.parse_args()
    
    if args.is_training:
        print("training")

#### data_loader.py


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

"""
Class to setup datasets (seperating images and labels) for both training and testing purposes
"""
class Image_Data_Set(Dataset):
    """
    Contructor method
    @param
        train (bool) = is the dataset in training or testing
        name (str) = name of data set
    @attr.
        flag (str) = a string indicating wether the data is used for training or testing
        data_frame (list-like object) = dataset for training/testing
        labels (torch.Tensor) = labels of dataset
    """
    def __init__(self, train=True, name='MNIST'):
        self.__name = name
        self.__flag = 'train' if train else 'test'
        self.__data_frame = None
        self.__labels = None

    """
    Set up the data frame and labels with the defined data set
    @param
        data_set (str) = string defining path to .csv file of data set
    """
    def setup_data(self, data_set):
        self.data_frame=pd.read_csv(data_set, header=None)
        self.labels=torch.tensor(self.dataframe[0].values)
        self.data_frame=torch.tensor(self.data_frame.drop(self.data_frame.columns[0], axis=1).values, dtype=torch.float)
        self.data_frame/=255
    
    # Normalize number(s) assuming max value = 255
    def normalize(self, row):
        row=row/255
    
    # Get length of data frame    
    def __len__(self):
        return len(self.dataframe)
    
    # Attritube getter functions
    def get_flag(self):
        return self.__flag
    
    def get_name(self):
        return self.__name

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        label = self.labels[idx]
        features = self.dataframe[idx]
        
        return features, label



# TODO: Write this part of the code somewhere else
if __name__=="__main__":
    #mnist=MNIST_set(None)
    #fashion_MNIST_set(None)
    mnist = Image_Data_Set(name="MNIST")
    fashion_mnist = Image_Data_Set(name="Fashion_MNIST")

#### mnist_factory.py


In [None]:
import os

"""
Converts image file and labels into .csv file
@param 
    img_file (str) = relative path to image file
    label_file (str) = relative path to label file
    out_file (str) = relative path to out file
    data_size (int) = number of data inputs to read
"""

def convert(img_file, label_file, out_file, data_size):
    # Get absolute path of all the necessary files
    project_root = os.getcwd()
    img_file = os.path.join(project_root, img_file)
    label_file = os.path.join(project_root, label_file)
    out_file = os.path.join(project_root, out_file)

    # Open all necessary files
    imgs = open(img_file, "rb")
    out = open(out_file, "w")
    labels = open(label_file, "rb")
    
    # Skip start of file because???
    # TODO: why skip bytes?
    imgs.read(16)
    labels.read(8)
    
    # Create a 2D list of images where each image is a 1D list where the first element is the label
    img_size = 28*28
    images = []

    for i in range(data_size):
        image = [ord(labels.read(1))]
        for j in range(img_size):
            image.append(ord(imgs.read(1)))
        images.append(image)

    # Convert each image from 1D list to a comma seperated str and write it into out file
    for image in images:
        out.write(",".join(str(pix) for pix in image)+"\n")
    
    # Close files
    imgs.close()
    out.close()
    labels.close()

# TODO: These lines should be written somewhere else
convert("data/mnist/train-images.idx3-ubyte", "data/mnist/train-labels.idx1-ubyte",
        "data/mnist/mnist_train.csv", 60000)
convert("data/mnist/t10k-images.idx3-ubyte", "data/mnist/t10k-labels.idx1-ubyte",
        "data/mnist/mnist_test.csv", 10000)

convert("data/fashion_mnist/train-images.idx3-ubyte", "data/fashion_mnist/train-labels.idx1-ubyte",
        "data/fashion_mnist/fashion-mnist_train.csv", 60000)
convert("data/fashion_mnist/t10k-images.idx3-ubyte", "data/fashion_mnist/t10k-labels.idx1-ubyte",
        "data/fashion_mnist/fashion-mnist_test.csv", 10000)

#### hebbian_layer.py

In [None]:
import torch
import torch.nn as nn
import math
import matplotlib.pyplot as plt
from numpy import outer
import warnings

warnings.filterwarnings("ignore")

#Hebbian learning layer that implements lateral inhibition in output. Not trained through supervision.

"""
HebbianLayer class is a module for neural network layers that use Hebbian learning.
"""
class HebbianLayer (nn.Module):

    """
    Constructor method for a hebbian layer
    @param
        input_dimension (int) = the number of input features for the layer
        output_dimension (int) = the number of output features/neurons for the layer
        classifier (bool) = a boolean that indicates whether this layer will act as a classifier
        lamb (float) = hyperparameter controlling strength of lateral inhibition
        heb_lr (float) = learning rate for hebbian learning updates
        gamma (float) = hyperparameter used for exponential moving average, which influences the learning updates
        eps (float) = a very small hyperparameter value used to prevent division by zero 
    """
    def __init__(self, input_dimension, output_dimension, classifier, lamb=2, heb_lr=0.001, gamma=0.99, eps=10e-5):
        super (HebbianLayer, self).__init__()

        # This block of code initialize everything specified by constructor parameter
        self.input_dimension=input_dimension
        self.output_dimension=output_dimension
        self.lamb=lamb
        self.alpha = heb_lr
        self.isClassifier=classifier
        self.scheduler=None
        self.eps=eps
        self.exponential_average=torch.zeros(self.output_dimension)
        self.gamma=gamma
        
        #This block of code initialize all pytorch neural network components u
        self.fc=nn.Linear(self.input_dimension, self.output_dimension, bias=True)
        self.relu=nn.ReLU()
        self.sigmoid=nn.Sigmoid()
        self.softplus=nn.Softplus()
        self.tanh=nn.Tanh()
        self.softmax=nn.Softmax()

        self.itensors=self.createITensors()
        
        #This block of code initialize the weights 
        for param in self.fc.parameters():
            # Employed uniform weight initialization
            param=torch.nn.init.uniform_(param, a=0.0, b=1.0)
            param.requires_grad_(False)
        
        
    def setScheduler(self, scheduler):
        self.scheduler=scheduler
        
    
    """
    Method creates identity tensors for Sanger's rule computation (which ensures orthogonality and prevent redundancy among neurons)
    @return 
        the itensors
    """
    def createITensors(self):
        itensors=torch.zeros(self.output_dimension, self.output_dimension, self.output_dimension, dtype=torch.float)
        for i in range(0, self.output_dimension):
            identity = torch.eye(i+1)
            padded_identity = torch.nn.functional.pad(identity, (0, self.output_dimension - i-1, 0, 
                                                                 self.output_dimension - i-1))
            itensors[i]=padded_identity
        return itensors
    


    """
    Method calculates the lateral inhibition. 
    The mathematical equation calculated with this function is the following:  ' h_mu = (h_mu)^(lambda)/ max on i (h_mu_i)^(lambda) '
    @return 
        the answer to the equation specified above
    """
    def inhibition(self, x):
        x=self.relu(x) 
        max_ele=torch.max(x).item()
        x=torch.pow(x, self.lamb)
        x/=abs(max_ele)**self.lamb
        return x
    

    """
    Method performs weight updates for a Hebbian learning-based classifier, 
    This method incorporates elements of both supervised and unsupervised learning depending on the provided parameters.
    @param
        input (vector) = the input feature vector to the layer
        output (vector) = the raw output logits from the layer before applying activation
        clamped_output (vector) = an OPTIONAL target output vector -- Only applicable if the layer is classifier layer
        train (bool) = a flag indicating whether the network is in training mode
                            - If not in training mode, weights are not updated
    """
    def updateWeightsHebbianClassifier(self, input, output, clamped_output=None, train=1):

        if train:
            # This block of code clone and detach the input,output tensors from the graph to avoid backprop effects
            # Then the tensors are squeezed to remove singleton dimensions, which siplifies tensor for computation
            u=output.clone().detach().squeeze()
            x=input.clone().detach().squeeze()

            # Transform the logits into a probability distribution over all output classes
            y=torch.softmax(u, dim=0)


            # This block of code is our weight update mechanism - This is separated into 2 parts
            # This is the first part, where we find the scaled adjustments needed for part 2
            A=None
            # If clamped_output IS PROVIDED, then I have a supervised learning scenario
            if clamped_output != None:
                # This outer product of the error and input vector indicates how much and in which direction the weights should be adjusted based on the error.
                outer_prod=torch.outer(clamped_output-y,x)

                # Perform element-wise multiplication of the logits and their corresponding probabilities
                # This is used to scale the updates.
                u_times_y=torch.mul(u,y)

                # Adjusts the computed outer product by subtracting a scaled version of the current weights. 
                # This adjustment is to regulate the updates, ensuring that they don't grow too large and contribute to overfitting or instability.
                A=outer_prod-self.fc.weight*(u_times_y.unsqueeze(1))

            # Unsupervised learning scenario is when clamped_output is NOT PROVIDED
            else:
                # Computes the outer product of the output probabilities and the input vector, 
                # This is a typical Hebbian update rule suggesting that weights should increase for co-occurring input-output pairs.
                A=torch.outer(y,x)

            
            # This is the second part of our weight update mechanism, where we apply the scaled adjustments found in part 1

            A=self.fc.weight+self.alpha*A # Updates the weights by adding the computed adjustments scaled by the learning rate
            weight_maxes=torch.max(A, dim=1).values
            self.fc.weight=nn.Parameter(A/weight_maxes.unsqueeze(1), requires_grad=False) # Normalizes each row of the weight matrix
            self.fc.weight[:, 0] = 0 # This sets the first element of each weight vector to zero. THIS IS SO THAT THE REST OF THE NEURONS START LEARNING
        



    """
    Method implements a variant of Hebbian learning that is enhanced by Sanger's rule for orthogonal feature extraction. 
    This method ensures that as new features are learned, they do not replicate the information captured by previously adjusted weights, leading to a more diverse and representative set of learned features. 
    The method also includes an element of adaptivity through the use of an exponential moving average. 
    This method is integral to unsupervised learning paradigms where the model learns to represent input data without explicit target outputs.

    In other words, the following equations are calculated:
        1. Employed Sanger's Rule, deltaW_(ij)=alpha*x_j*y_i-alpha*y_i*sum(k=1 to i) (w_(kj)*y_k)
        2. Calculated outer product of input and output and adds it to matrix.

    @param
        input (vector) = the input feature vector to the layer
        output (vector) = the raw output logits from the layer before applying activation
        clamped_output (vector) = DID NOT USE IN THIS FUNCTION... SO REDUNDANT CODE
        train (bool) = a flag indicating whether the network is in training mode
                            - If not in training mode, weights are not updated
    """
    def updateWeightsHebbian(self, input, output, clamped_output=None, train=1):
        if train:
            x=torch.tensor(input.clone().detach(), requires_grad=False, dtype=torch.float).squeeze()
            y=torch.tensor(output.clone().detach(), requires_grad=False, dtype=torch.float).squeeze()

            # This matrix represents the product of post-activation outputs and input features**, which is the core of the Hebbian learning update (recall assocaitive learning)
            outer_prod=torch.tensor(outer(y, x))

            # This block of code implements sanger's rule
            initial_weight=torch.transpose(self.fc.weight.clone().detach(), 0,1)
            # This adjust weights based on their contributions to earlier outputs, effectively implementing Sanger's rule which ensures that features learned by earlier neurons are orthogonal to those learned by subsequent neurons.
            A=torch.einsum('jk, lkm, m -> lj', initial_weight, self.itensors, y)
            # Modifies the adjustment matrix by scaling it with the output activations, aligning the updates with the level of neuron activation.
            A=A*(y.unsqueeze(1))

            # This block of code performs weight update
            # Calculates the weight delta by scaling the difference between the outer product and the Sanger's rule adjustments by the learning rate
            delta_weight=self.alpha*(outer_prod-A)
            # Applies the weight update
            self.fc.weight=nn.Parameter(torch.add(self.fc.weight, delta_weight), requires_grad=False)

            # This line of code updates an exponential moving average of the outputs, which will be used in weight decay
            self.exponential_average=torch.add(self.gamma*self.exponential_average,(1-self.gamma)*y)




    def updateBias(self, output, train=1):
        if train:
            y=output.clone().detach().squeeze()
            exponential_bias=torch.exp(-1*self.fc.bias)
            A=torch.mul(exponential_bias, y)-1
            A=self.fc.bias+self.alpha*A
            bias_maxes=torch.max(A, dim=0).values
            self.fc.bias=nn.Parameter(A/bias_maxes.item(), requires_grad=False)


    """
    This method aims to:
        1. decay overused weights and 
        2. boost underused ones based on their activity, measured by the exponential moving average of the outputs.
    This approach helps in maintaining a balanced network, preventing any single neuron from dominating the learning process excessively.
    """
    def weightDecay(self):

        # This block of code uses exponential moving average 
        average=torch.mean(self.exponential_average).item() # This average provides a baseline to compare individual neuron activities against.
        # This line creates a ratio of each neuron's activity relative to the average activity. 
            # Values greater than 1 indicate neurons that are more active than average, while 
            # values less than 1 indicate less active neurons.
        A=self.exponential_average/average 

        # This block of code calculates the growth factors
        # Calculates a growth factor for more active neurons. 
            # This factor scales down as the activity (**`A`**) increases beyond the average
        growth_factor_positive=self.eps*self.tanh(-self.eps*(A-1))+1 
    # growth_factor_positive = self.eps * 2 * self.tanh(-self.eps * 10000000 * (A - 1)) + 1
        # For underused neurons, this factor is the reciprocal of the positive growth factor, 
            # meaning it will scale up weights that are less active than average.
        growth_factor_negative=torch.reciprocal(growth_factor_positive)

        # This block of code applies the growth factors and updates weights
        positive_weights=torch.where(self.fc.weight>0, self.fc.weight, 0.0)
        negative_weights=torch.where(self.fc.weight<0, self.fc.weight, 0.0)
        positive_weights=positive_weights*growth_factor_positive.unsqueeze(1)
        negative_weights=negative_weights*growth_factor_negative.unsqueeze(1)
        self.fc.weight=nn.Parameter(torch.add(positive_weights, negative_weights), requires_grad=False)
        if (self.fc.weight.isnan().any()):
            print("NAN WEIGHT")
    

    #Feed forward
    """
    This method aims to define:
        1. how input data is processed through the layer and 
        2. how various other functions—such as weight updates and activations—are integrated during the forward pass
    @param
        x (vector) = input vector to the layer
        clamped_output (vector)=  An optional tensor that can be used to provide target outputs for supervised learning scenarios.
        train (bool) = a flag indicating whether the layer is in training mode
    """
        
    def forward(self, x, clamped_output=None, train=1):
        input=x.clone()

        # IF the current layer is a classifier
        if self.isClassifier:
            x=self.fc(x) #  Applies the linear transformation defined by the layer's weights and biases to the input tensor. 
            self.updateWeightsHebbianClassifier(input, x, clamped_output=clamped_output, train=train) # adjusts the weights based on the output and the optional clamped output
            #self.updateBias(x, train=train)
            x=self.softmax(x)
            return x
        # IF the current layer is NOT A CLASSIFIER
        else:
            x=self.fc(x)
            x=self.inhibition(x) # Applies an inhibition function to the transformed outputs. This function modulates the activations to implement lateral inhibition, enhancing feature contrast and reducing redundancy.
            self.updateWeightsHebbian(input, x, train) # Performs the weight update using a general Hebbian learning rule, which might include aspects of Sanger's rule for feature decorrelation.
            #self.updateBias(x, train=train)
            self.weightDecay() 
            return x
    
    #Counts the number of active feature selectors (above a certain cutoff beta).
    def activeClassifierWeights(self, beta):
        weights=self.fc.weight
        active=torch.where(weights>beta, weights, 0.0)
        return active.nonzero().size(0)
    
    #Creates heatmap of randomly chosen feature selectors.
    def visualizeWeights(self, classifier=0):
        weight = self.fc.weight
        if classifier:
            fig, axes = plt.subplots(2, 5, figsize=(16, 8))
            for ele in range(10):  
                random_feature_selector = weight[ele]
                heatmap = random_feature_selector.view(int(math.sqrt(self.fc.weight.size(1))),
                                                        int(math.sqrt(self.fc.weight.size(1))))
                ax = axes[ele // 5, ele % 5]
                im = ax.imshow(heatmap, cmap='hot', interpolation='nearest')
                fig.colorbar(im, ax=ax)
                ax.set_title(f'Weight {ele}')
            plt.tight_layout()
            plt.show()
        else:
            fig, axes = plt.subplots(8, 8, figsize=(16, 16))
            for ele in range(self.output_dimension): 
                random_feature_selector = weight[ele]
                heatmap = random_feature_selector.view(int(math.sqrt(self.fc.weight.size(1))),
                                                        int(math.sqrt(self.fc.weight.size(1))))
                ax = axes[ele // 8, ele % 8]
                im = ax.imshow(heatmap, cmap='hot', interpolation='nearest')
                fig.colorbar(im, ax=ax)
                ax.set_title(f'Weight {ele}')
            plt.tight_layout()
            plt.show()


#### mlp.py

In [None]:
import torch
import torch.nn as nn 
import torch.optim as optim
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader
from data.data_loader import MNIST_set, fashion_MNIST_set
from models.hebbian_network import HebbianNetwork
from layers.scheduler import Scheduler

"""
Class to create and run experiments using model
"""
class MLPExperiment():
    """
    Constructor moethod
    @param
        args () =
        input_dimension (int) = number of input data
        hidden_layer_dimension (int) = number of neurons in hidden layer
        output_dimension (int) = number of output neurons
        lamb (float) = hyperparameter for lateral inhibition
        heb_lr (float) = hebbian layer learning rate
        grad_lr (float) = gradiant descent learning rate
        num_epochs (int) = number of epochs
        gamma (float) = decay factor for learning rate
        eps (float) = small number to avoid 0 division
    """
    def __init__(self, args, input_dimension, hidden_layer_dimension, output_dimension, lamb=1, heb_lr=1, grad_lr=0.001, num_epochs=3, gamma=0, eps=10e-5):
        self.model = HebbianNetwork(input_dimension, hidden_layer_dimension, output_dimension, heb_lr=heb_lr, lamb=lamb, eps=eps) # TODO: For some reason my hebbian network is not processing batches together.
        self.args = args
        self.num_epochs = num_epochs
        self.grad_lr = grad_lr 
        self.heb_lr = heb_lr
        self.gamma = gamma
        
    # Return Adam optimizer
    def optimizer(self):
        optimizer = optim.Adam(self.model.parameters(), self.grad_lr)
        return optimizer
    
    # Set the scheduler for hebbian layer.
    def set_hebbian_scheduler(self):
        scheduler = Scheduler(self.heb_lr, 1000, self.gamma)
        self.model.setScheduler(scheduler, 0)

    # Return cross entropy loss function.
    def loss_function(self):
        loss_function = nn.CrossEntropyLoss()
        return loss_function
    
    # Trains the experiment.
    # TODO: create proper methods for trainng each data set
    def train(self):  
        # Load the dataset into a data loader
        data_set = fashion_MNIST_set(self.args)
        # data_set = MNIST_set(self.args)
        data_loader = DataLoader(data_set, batch_size=1, shuffle=True)
        
        # Train model on dataset
        self.model.train()
        
        optimizer = self.optimizer()
        if self.gamma !=0 : self.set_hebbian_scheduler()
        
        for _ in range(self.num_epochs):
            for i, data in enumerate(data_loader):
                inputs, labels=data
                self.model(inputs, clamped_output=self.oneHotEncode(labels, 10))
                optimizer.step()
        
            
    # Given a tensor of labels, returns a one hot encoded tensor for each label.
    def oneHotEncode(self, labels, num_classes):
        one_hot_encoded = torch.zeros(len(labels), num_classes)
        one_hot_encoded.scatter_(1, labels.unsqueeze(1), 1)
    
        return one_hot_encoded.squeeze()
        
    # Visualizes the weights associated with the first feature detector layer.
    def visualizeWeights(self):
        self.model.visualizeWeights()
    
    def test(self):
        data_set=fashion_MNIST_set(self.args, 0)
        data_loader=DataLoader(data_set, batch_size=1, shuffle=True)
        cor=0
        tot=0
        for _, data in enumerate(data_loader):
            inputs, labels=data
            outputs = torch.argmax(self.model(inputs, None, train=0))
            if outputs.item()==labels.item():
                cor+=1
            tot+=1
        return cor/tot
    
    def printExponentialAverages(self):
        A=torch.log(experiment.model.hebbian_layer.exponential_average).tolist()
        plt.scatter(range(len(A)), A)
        for i, (x, y) in enumerate(zip(range(len(A)), A)):
            plt.text(x, y, f'{i}', ha='center', va='bottom')
        plt.xlabel("Feature Selector")
        plt.ylabel("Log (Exponential Average)")
        plt.title("Logged Exponential Averages of Each Feature Selector")
        
    def activeClassifierWeights(self, beta):
        return self.model.classifier_layer.activeClassifierWeights(beta)


# TODO: get rid of the code here and move it somewhere else
if __name__=="__main__":
    experiment=MLPExperiment(None, 784, 256 , 10, lamb=1, num_epochs=1, heb_lr=0.1)
    experiment.train()
    experiment.visualizeWeights(10, classifier=0)
    experiment.visualizeWeights(10, classifier=1)
    #experiment.test()
    
            
            
        

#### hebbian_network.py

In [None]:
from layers.hebbian_layer import HebbianLayer
import torch.nn as nn 

class HebbianNetwork(nn.Module):
    """
    Constructor method
    @param
        input_dimension (int) = number of inputs
        hidden_layer_dimension (int) = number of neurons in hidden layer
        output_dimension (int) = number of output neurons
        heb_lr (float) = learning rate of NN
        lamb (float) = hyperparameter for lateral neuron inhibition
        eps (float) = small value to avoid 0 division
    """
    def __init__(self, input_dimension, hidden_layer_dimension, output_dimension, heb_lr=1, lamb=1, eps=10e-5):
        super(HebbianNetwork, self).__init__()
        self.input_dimension = input_dimension
        self.output_dimension = output_dimension
        self.hidden_layer_dimension = hidden_layer_dimension
        self.hebbian_layer = HebbianLayer(self.input_dimension, self.hidden_layer_dimension, False, lamb=lamb, heb_lr=heb_lr, eps=eps)
        self.classifier_layer = HebbianLayer(self.hidden_layer_dimension, self.output_dimension, True, lamb=lamb, heb_lr=heb_lr)
    
    """
    Method to set scheduler to either the classification layer or the hebbian layer
    @param
        scheduler (layers.Scheduler) = a scheduler
        classifier (bool) = true if setting scheduler for classifier layer
    """
    # TODO: modify this and make it more reusable -> might need to change other parts of the code (hebbian_layer.py)
    def setScheduler(self, scheduler, classifier):
        if classifier:
            self.classifier_layer.setScheduler(scheduler)
        else:
            self.hebbian_layer.setScheduler(scheduler)

    """
    Method that defines how an input data flows throw the network
    @param
        x (torch.Tensor) = input data as a tensor
        clamped_out (???) = parameter to clamp the output #WTV this means
        train (int) = true if in training
    """   
    def forward(self, x, clamped_output=None, train=1):
        x=self.hebbian_layer(x, clamped_output=None, train=train)
        x=self.classifier_layer(x, clamped_output=clamped_output, train=train)
        return x
    
    
    # Method to visualize the weights/features learned by each neuron during training
    def visualizeWeights(self):
        self.hebbian_layer.visualizeWeights(classifier=0)
        self.classifier_layer.visualizeWeights(classifier=1)

# TODO: remove this from code
if __name__=="__main__":
    network = HebbianNetwork(784, 256, 10)
    print(network)

#### scheduler.py

In [None]:
"""
Class to manage and update learning rate of optimization process
"""
class Scheduler():
    """
    Contructor method
    @param
        heb_lr (float) = learing rate
        step_size (int) = step size -> how often the learning rate should be updated
        gamma (float) = decay factor -> factor to decay learning rate
    @attr.
        flag (str) = a string indicating wether the data is used for training or testing
        data_frame (list-like object) = dataset for training/testing
        labels (torch.Tensor) = labels of dataset
    """
    def __init__(self, heb_lr, step_size, gamma=0.99):
        self.lr = heb_lr
        self.step_size = step_size
        self.gamma = gamma
        # epoch counter
        self.epoch = 0

    """
    Method used to increment epoch counter and update learning rate at every multiple of step size
    """    
    def step(self):
        self.epoch += 1
        if (self.epoch % self.step_size) == 0:
            self.lr *= self.gamma
        return self.lr