# Documentation

#### run.py

In [None]:
import argparse
import torch

if __name__=="__main__":
    # Creates argument parser object
    parser = argparse.ArgumentParser(description='Biological deep learning')
    
    # Adds arguments to the parser and sets their default values
    parser.add_argument('--is_training', type=int, default=1, help='status')
    parser.add_argument('--data_root', type=str, default="data/mnist/")
    parser.add_argument('--train_data_filename', type=str, default="mnist_train.csv")
    parser.add_argument('--test_data_filename', type=str, default="mnist_test.csv")
    parser.add_argument('--learning_rate', type=int, default=0.001)
    parser.add_argument('--num_epochs', type=int, default=100)
    
    # Parse arguments given to the parser
    args=parser.parse_args()
    
    if args.is_training:
        print("training")

#### data_loader.py


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

"""
Class to setup datasets (seperating images and labels) for both training and testing purposes
"""
class Image_Data_Set(Dataset):
    """
    Contructor method
    @param
        train (bool) = is the dataset in training or testing
        name (str) = name of data set
    @attr.
        flag (str) = a string indicating wether the data is used for training or testing
        data_frame (list-like object) = dataset for training/testing
        labels (torch.Tensor) = labels of dataset
    """
    def __init__(self, train=True, name):
        self.__name = name
        self.__flag = 'train' if train else 'test'
        self.__data_frame = None
        self.__labels = None

    """
    Set up the data frame and labels with the defined data set
    @param
        data_set (str) = string defining path to .csv file of data set
    """
    def setup_data(self, data_set):
        self.data_frame=pd.read_csv(data_set, header=None)
        self.labels=torch.tensor(self.dataframe[0].values)
        self.data_frame=torch.tensor(self.data_frame.drop(self.data_frame.columns[0], axis=1).values, dtype=torch.float)
        self.data_frame/=255
    
    # Normalize number(s) assuming max value = 255
    def normalize(self, row):
        row=row/255
    
    # Get length of data frame    
    def __len__(self):
        return len(self.dataframe)
    
    # Attritube getter functions
    def get_flag(self):
        return self.__flag
    
    def get_name(self):
        return self.__name

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        label = self.labels[idx]
        features = self.dataframe[idx]
        
        return features, label



# TODO: Write this part of the code somewhere else
if __name__=="__main__":
    #mnist=MNIST_set(None)
    #fashion_MNIST_set(None)
    mnist = Image_Data_Set(name="MNIST")
    fashion_mnist = Image_Data_Set(name="Fashion_MNIST")

#### mnist_factory.py


In [None]:
import os

"""
Converts image file and labels into .csv file
@param 
    img_file (str) = relative path to image file
    label_file (str) = relative path to label file
    out_file (str) = relative path to out file
    data_size (int) = number of data inputs to read
"""

def convert(img_file, label_file, out_file, data_size):
    # Get absolute path of all the necessary files
    project_root = os.getcwd()
    img_file = os.path.join(project_root, img_file)
    label_file = os.path.join(project_root, label_file)
    out_file = os.path.join(project_root, out_file)

    # Open all necessary files
    imgs = open(img_file, "rb")
    out = open(out_file, "w")
    labels = open(label_file, "rb")
    
    # Skip start of file because???
    # TODO: why skip bytes?
    imgs.read(16)
    labels.read(8)
    
    # Create a 2D list of images where each image is a 1D list where the first element is the label
    img_size = 28*28
    images = []

    for i in range(data_size):
        image = [ord(labels.read(1))]
        for j in range(img_size):
            image.append(ord(imgs.read(1)))
        images.append(image)

    # Convert each image from 1D list to a comma seperated str and write it into out file
    for image in images:
        out.write(",".join(str(pix) for pix in image)+"\n")
    
    # Close files
    imgs.close()
    out.close()
    labels.close()

# TODO: These lines should be written somewhere else
convert("data/mnist/train-images.idx3-ubyte", "data/mnist/train-labels.idx1-ubyte",
        "data/mnist/mnist_train.csv", 60000)
convert("data/mnist/t10k-images.idx3-ubyte", "data/mnist/t10k-labels.idx1-ubyte",
        "data/mnist/mnist_test.csv", 10000)

convert("data/fashion_mnist/train-images.idx3-ubyte", "data/fashion_mnist/train-labels.idx1-ubyte",
        "data/fashion_mnist/fashion-mnist_train.csv", 60000)
convert("data/fashion_mnist/t10k-images.idx3-ubyte", "data/fashion_mnist/t10k-labels.idx1-ubyte",
        "data/fashion_mnist/fashion-mnist_test.csv", 10000)

#### mlp.py

In [None]:
import torch
import torch.nn as nn 
import torch.optim as optim
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader
from data.data_loader import MNIST_set, fashion_MNIST_set
from models.hebbian_network import HebbianNetwork
from layers.scheduler import Scheduler


class MLPExperiment():
    
    def __init__(self, args, input_dimension, hidden_layer_dimension, output_dimension, 
                 lamb=1, heb_lr=1, grad_lr=0.001, num_epochs=3, gamma=0, eps=10e-5):
        self.model=HebbianNetwork(input_dimension, hidden_layer_dimension, 
                                  output_dimension, heb_lr=heb_lr, lamb=lamb, eps=eps)#TODO: For some reason my hebbian network is not processing batches together.
        self.args=args
        self.num_epochs=num_epochs
        self.grad_lr=grad_lr 
        self.heb_lr=heb_lr
        self.gamma=gamma
        
    #Returns ADAM optimize for gradient descent.
    def optimizer(self):
        optimizer = optim.Adam(self.model.parameters(), self.grad_lr)
        return optimizer
    
    #Sets the scheduler for the feature detector layer of our network.
    def set_hebbian_scheduler(self):
        scheduler=Scheduler(self.heb_lr, 1000, self.gamma)
        self.model.setScheduler(scheduler, 0)

    #Returns cross entropy loss function.
    def loss_function(self):
        loss_function = nn.CrossEntropyLoss()
        return loss_function
    
    #Trains the experiment.
    def train(self):  
        data_set=fashion_MNIST_set(self.args)
        # data_set=MNIST_set(self.args)
        data_loader=DataLoader(data_set, batch_size=1, shuffle=True)
        
        self.model.train()
        
        optimizer=self.optimizer()
        if self.gamma !=0 : self.set_hebbian_scheduler()
        
        for _ in range(self.num_epochs):
            for i, data in enumerate(data_loader):
                inputs, labels=data
                self.model(inputs, clamped_output=self.oneHotEncode(labels, 10))
                optimizer.step()
        
            
    #Given a tensor of labels, returns a one hot encoded tensor for each label.
    def oneHotEncode(self, labels, num_classes):
        one_hot_encoded = torch.zeros(len(labels), num_classes)
        one_hot_encoded.scatter_(1, labels.unsqueeze(1), 1)
    
        return one_hot_encoded.squeeze()
        
    #Visualizes the weights associated with the first feature detector layer.
    def visualizeWeights(self):
        self.model.visualizeWeights()
    
    def test(self):
        data_set=fashion_MNIST_set(self.args, 0)
        data_loader=DataLoader(data_set, batch_size=1, shuffle=True)
        cor=0
        tot=0
        for _, data in enumerate(data_loader):
            inputs, labels=data
            outputs = torch.argmax(self.model(inputs, None, train=0))
            if outputs.item()==labels.item():
                cor+=1
            tot+=1
        return cor/tot
    
    def printExponentialAverages(self):
        A=torch.log(experiment.model.hebbian_layer.exponential_average).tolist()
        plt.scatter(range(len(A)), A)
        for i, (x, y) in enumerate(zip(range(len(A)), A)):
            plt.text(x, y, f'{i}', ha='center', va='bottom')
        plt.xlabel("Feature Selector")
        plt.ylabel("Log (Exponential Average)")
        plt.title("Logged Exponential Averages of Each Feature Selector")
        
    def activeClassifierWeights(self, beta):
        return self.model.classifier_layer.activeClassifierWeights(beta)
if __name__=="__main__":
    experiment=MLPExperiment(None, 784, 256 , 10, lamb=1, num_epochs=1, heb_lr=0.1)
    experiment.train()
    experiment.visualizeWeights(10, classifier=0)
    experiment.visualizeWeights(10, classifier=1)
    #experiment.test()
    
            
            
        

#### hebbian_network.py

In [None]:
from layers.hebbian_layer import HebbianLayer
import torch.nn as nn 

class HebbianNetwork(nn.Module):
    
    def __init__(self, input_dimension, hidden_layer_dimension, output_dimension, heb_lr=1, lamb=1, eps=10e-5):
        super(HebbianNetwork, self).__init__()
        self.input_dimension=input_dimension
        self.output_dimension=output_dimension
        self.hidden_layer_dimension=hidden_layer_dimension
        self.hebbian_layer=HebbianLayer(self.input_dimension, self.hidden_layer_dimension, False, lamb=lamb, heb_lr=heb_lr, eps=eps)
        self.classifier_layer=HebbianLayer(self.hidden_layer_dimension, self.output_dimension, True, lamb=lamb, 
                                           heb_lr=heb_lr)
    
    def setScheduler(self, scheduler, classifier):
        if classifier:
            self.classifier_layer.setScheduler(scheduler)
        else:
            self.hebbian_layer.setScheduler(scheduler)
            
    def forward(self, x, clamped_output=None, train=1):
        x=self.hebbian_layer(x, clamped_output=None, train=train)
        x=self.classifier_layer(x, clamped_output=clamped_output, train=train)
        return x
    
    def visualizeWeights(self):
        self.hebbian_layer.visualizeWeights(classifier=0)
        self.classifier_layer.visualizeWeights(classifier=1)
    
if __name__=="__main__":
    network=HebbianNetwork(784, 256, 10)
    print(network)