This Notebook provides a minimal example for using LFP to train a simple LeNet on MNIST.

For more complex examples, refer to the experiment notebooks in ./nbs

### Imports

In [None]:
import os
import joblib
import random
import time

import numpy as np
import torch
import torch.nn as tnn
import torcheval.metrics
import torchvision.datasets as tvisiondata
import torchvision.transforms as T
from tqdm import tqdm

from lfprop.model.models import ACTIVATION_MAP

from lfprop.propagation import (
    propagator_lxt as propagator,
)  # LFP propagator.
from lfprop.rewards import reward_functions as rewards  # Reward Functions
from lfprop.rewards import rewards as loss_fns
from torch_pso import ParticleSwarmOptimizer
from fa import *
from dladmm import dladmm 
from dladmm import input_data as dladmm_data

  from .autonotebook import tqdm as notebook_tqdm


### Parameters

In [None]:
model_name = "lenet"
method_name = "pso" # lfp-epsilon, vanilla-gradient, pso, fa, dladmm | TODO ldtp, ga
seed = 0
epochs = 50

data_path = "your_data_path_here"  # Path to your dataset
savepath = "your_save_path_here" # Path to save results
os.makedirs(savepath, exist_ok=True)

n_channels = 1
n_outputs = 10
batch_size = 1000

general_params = {
    "n_channels": n_channels,
    "n_outputs": n_outputs,
    "batch_size": batch_size,
    "epochs": epochs
}

def set_random_seeds(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = False

set_random_seeds(seed)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

### Load Dataset

In [3]:
transform = T.Compose([T.ToTensor(), T.Normalize((0.5,), (0.5,))])
training_data = tvisiondata.MNIST(
    root=data_path,
    transform=transform,
    download=True,
    train=True,
)

testing_data = tvisiondata.MNIST(
    root=data_path,
    transform=transform,
    download=True,
    train=False,
)

training_loader = torch.utils.data.DataLoader(training_data, batch_size=batch_size, shuffle=True)
testing_loader = torch.utils.data.DataLoader(testing_data, batch_size=batch_size, shuffle=False)

### Load Model

In [4]:
class MLP(tnn.Module):
    """
    Small MLP
    """

    def __init__(self, n_channels, n_outputs, activation=tnn.ReLU):
        super().__init__()

        # Classifier
        self.classifier = tnn.Sequential(
            tnn.Linear(28*28, 120),
            activation(),
            tnn.Linear(120, 84),
            activation(),
            tnn.Linear(84, n_outputs),
        )

    def forward(self, x):
        """
        forwards input through network
        """

        # Forward through network
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        # Return output
        return x
    
class FaMLP(tnn.Module):
    """
    Small MLP supporting feedback alignment
    """

    def __init__(self, n_channels, n_outputs, activation=tnn.ReLU):
        super().__init__()

        # Classifier
        self.classifier = tnn.Sequential(
            LinearFA(28*28, 120),
            activation(),
            LinearFA(120, 84),
            activation(),
            LinearFA(84, n_outputs),
        )

    def forward(self, x):
        """
        forwards input through network
        """

        # Forward through network
        x = torch.flatten(x, 1)
        x = self.classifier(x)

        # Return output
        return x

class LeNet(tnn.Module):
    """
    Small LeNet
    """

    def __init__(self, n_channels, n_outputs, activation=tnn.ReLU):
        super().__init__()

        # Feature extractor
        self.features = tnn.Sequential(
            tnn.Conv2d(n_channels, 16, 5),
            activation(),
            tnn.MaxPool2d(2, 2),
            tnn.Conv2d(16, 16, 5),
            activation(),
            tnn.MaxPool2d(2, 2),
        )

        # Classifier
        self.classifier = tnn.Sequential(
            tnn.Linear(256 if n_channels == 1 else 400, 120),
            activation(),
            tnn.Dropout(),
            tnn.Linear(120, 84),
            activation(),
            tnn.Dropout(),
        )

        self.last = tnn.Linear(84, n_outputs)
        
    def forward(self, x):
        """
        forwards input through network
        """

        # Forward through network
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        x = self.last(x)

        # Return output
        return x
    
class FaLeNet(tnn.Module):
    """
    Small LeNet supporting feedback alignment
    """

    def __init__(self, n_channels, n_outputs, activation=tnn.ReLU):
        super().__init__()

        # Feature extractor
        self.features = tnn.Sequential(
            Conv2dFA(n_channels, 16, 5),
            activation(),
            tnn.MaxPool2d(2, 2),
            Conv2dFA(16, 16, 5),
            activation(),
            tnn.MaxPool2d(2, 2),
        )

        # Classifier
        self.classifier = tnn.Sequential(
            LinearFA(256 if n_channels == 1 else 400, 120),
            activation(),
            tnn.Dropout(),
            LinearFA(120, 84),
            activation(),
            tnn.Dropout(),
        )

        self.last = LinearFA(84, n_outputs)

    def forward(self, x):
        """
        forwards input through network
        """

        # Forward through network
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        x = self.last(x)

        # Return output
        return x

def name_modules(module, name):
    """
    Recursive function to name modules for debugging 
    """
    
    for cname, child in module.named_children():
        child.tmpname = cname if name == "" else f"{name}.{cname}"
        name_modules(child, child.tmpname)

### Evaluation and Training Helpers

In [None]:
def eval_model(model, loader, objective_func):
    """
    Evaluates the model on a single dataset
    """
    eval_metrics = {
        "objective": torcheval.metrics.Mean(device=device),
        "accuracy": torcheval.metrics.MulticlassAccuracy(average="micro", num_classes=10, k=1, device=device),
    }

    model.eval()

    # Iterate over Data Loader
    for index, (inputs, labels) in enumerate(loader):
        inputs = inputs.to(device)
        labels = torch.tensor(labels).to(device)

        with torch.no_grad():
            # Get model predictions
            outputs = model(inputs)

        with torch.set_grad_enabled(True):
            # Get rewards
            objective = objective_func(outputs, labels)

        for k, v in eval_metrics.items():
            if k == "objective":
                eval_metrics[k].update(objective)
            else:
                eval_metrics[k].update(outputs, labels)

    return_dict = {m: metric.compute().detach().cpu().numpy() for m, metric in eval_metrics.items()}

    # Return evaluation
    return return_dict

def lfp_step(model, optimizer, objective_func, propagation_composite, inputs, labels):
    """
    Performs a single training step using LFP. This is quite similar to a standard gradient descent training loop.
    """
    # Set Model to training mode
    model.train()

    with torch.enable_grad():
        # Zero Optimizer
        optimizer.zero_grad()

        # This applies LFP Hooks/Functions
        with propagation_composite.context(model) as modified:
            inputs = inputs.detach().requires_grad_(True)
            outputs = modified(inputs)

            # Calculate reward
            # Do like this to avoid tensors being kept in memory
            reward = torch.from_numpy(objective_func(outputs, labels).detach().cpu().numpy()).to(device)

            # Calculate LFP and write into .feedback attribute of parameters
            torch.autograd.grad((outputs,), (inputs,), grad_outputs=(reward,), retain_graph=False)[0]

            # Write LFP Testues into .grad attributes. Note the negative sign: LFP requires maximization instead of minimization like gradient descent
            for name, param in model.named_parameters():
                param.grad = -param.feedback

            # Update Clipping. Training may become unstable otherwise, especially in small models with large learning rates.
            # In larger models (e.g., VGG, ResNet), where smaller learning rates are generally utilized, not clipping updates may result in better performance.
            torch.nn.utils.clip_grad_norm_(model.parameters(), 3.0, 2.0)

            # Optimization step
            optimizer.step()

    # Set Model back to eval mode
    model.eval()
    
def grad_step(model, optimizer, objective_func, inputs, labels):
    """
    Performs a single training step using Gradient Descent
    """
    # Set Model to training mode
    model.train()

    with torch.enable_grad():
        # Zero Optimizer
        optimizer.zero_grad()
            
        inputs = inputs.detach()
        outputs = model(inputs)

        # Calculate reward
        # Do like this to avoid tensors being kept in memory
        loss = objective_func(outputs, labels)
        loss.backward()

        # Update Clipping. Training may become unstable otherwise, especially in small models with large learning rates.
        # In larger models (e.g., VGG, ResNet), where smaller learning rates are generally utilized, not clipping updates may result in better performance.
        torch.nn.utils.clip_grad_norm_(model.parameters(), 3.0, 2.0)

        # Optimization step
        optimizer.step()

    # Set Model back to eval mode
    model.eval()

def pso_step(model, optimizer, objective_func, inputs, labels, max_steps):
    """
    Performs a single training step using PSO
    """
    # Set Model to training mode
    model.train()

    # def schedule_inertial_weight():
    #     if hasattr(optimizer, "inertial_weight_min") and hasattr(optimizer, "inertial_weight_max"):
    #         inertial_weight = optimizer.inertial_weight_max - (optimizer.inertial_weight_max - optimizer.inertial_weight_min)/max_steps * optimizer.current_step
    #         optimizer.inertial_weight = inertial_weight
    #         for particle in optimizer.particles:
    #             particle.inertial_weight = inertial_weight

    with torch.no_grad():
        # Zero Optimizer
        optimizer.zero_grad()
            
        inputs = inputs.detach()

        def closure():
            # Clear any grads from before the optimization step, since we will be changing the parameters
            optimizer.zero_grad()  
            return objective_func(model(inputs), labels) # VERY IMPORTANT that model forward is INSIDE closure

        # Optimization step
        optimizer.step(closure)
        #schedule_inertial_weight()

    # Set Model back to eval mode
    model.eval()


# Training Loop
def train(model, optimizer, objective_func, propagation_composite, **kwargs):
    
    evals = {
        "train_accuracy": [],
        "train_objective": [],
        "test_accuracy": [],
        "test_objective": [],
        "clock_time": [],
    }
    
    eval_stats_train = eval_model(model, training_loader, objective_func)
    eval_stats_test = eval_model(model, testing_loader, objective_func)
    print(
        "INIT: (Train Objective) {:.2f}; (Train Accuracy) {:.2f}; (Test Objective) {:.2f}; (Test Accuracy) {:.2f}".format(
            float(np.mean(eval_stats_train["objective"])),
            float(eval_stats_train["accuracy"]),
            float(np.mean(eval_stats_test["objective"])),
            float(eval_stats_test["accuracy"]),
        )
    )

    for epoch in range(epochs):
        # Iterate over Data Loader
        pre = time.time()
        for index, (inputs, labels) in enumerate(training_loader):
            inputs = inputs.to(device)
            labels = torch.tensor(labels).to(device)

            # Perform Update Step
            if propagation_composite is None:
                grad_step(model, optimizer, objective_func, inputs, labels)
            elif propagation_composite == "pso":
                pso_step(model, optimizer, objective_func, inputs, labels, max_steps = len(training_loader)*epochs)
            else:
                lfp_step(model, optimizer, objective_func, propagation_composite, inputs, labels)
            
            # Log zero ratios
            for cname, child in model.named_modules():
                if hasattr(child, "zeros_ratio"):
                    if f"zeros_{child.tmpname}" not in evals.keys():
                        evals[f"zeros_{child.tmpname}"] = []
                    evals[f"zeros_{child.tmpname}"].append(child.zeros_ratio)

        post = time.time()

        # Evaluate and print performance after every epoch
        eval_stats_train = eval_model(model, training_loader, objective_func)
        eval_stats_test = eval_model(model, testing_loader, objective_func)
        print(
            "Epoch {}/{}: (Train Objective) {:.2f}; (Train Accuracy) {:.2f}; (Test Objective) {:.2f}; (Test Accuracy) {:.2f}".format(
                epoch + 1,
                epochs,
                float(np.mean(eval_stats_train["objective"])),
                float(eval_stats_train["accuracy"]),
                float(np.mean(eval_stats_test["objective"])),
                float(eval_stats_test["accuracy"]),
            )
        )

        evals["train_accuracy"].append(float(eval_stats_train["accuracy"]))
        evals["train_objective"].append(float(eval_stats_train["objective"]))
        evals["test_accuracy"].append(float(eval_stats_test["accuracy"]))
        evals["test_objective"].append(float(eval_stats_test["objective"]))
        evals["clock_time"].append(post-pre)
        
    return evals

### Set Up Training Method

In [6]:
if method_name == "lfp-epsilon":
    model_class = MLP if model_name == "mlp" else LeNet
    model = model_class(
        n_channels=n_channels,
        n_outputs=n_outputs,
        activation=tnn.ReLU
    )
    name_modules(model, "")
    model.tmpname = "root"
    model.to(device)
    model.eval()

    training_cfg = {
        "model_class": model_class,
        "propagation_composite": propagator.LFPEpsilonComposite(),
        "objective_func": rewards.SoftmaxLossReward(device),
        "model": model,
        "optimizer": torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9),
    }
    training_func = train

elif method_name == "vanilla-gradient":
    model_class = MLP if model_name == "mlp" else LeNet
    model = model_class(
        n_channels=n_channels,
        n_outputs=n_outputs,
        activation=tnn.ReLU
    )
    name_modules(model, "")
    model.tmpname = "root"
    model.to(device)
    model.eval()

    training_cfg = {
        "model_class": model_class,
        "propagation_composite": None,
        "objective_func": loss_fns.CustomCrossEntropyLoss(),
        "model": model,
        "optimizer": torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9),
    }
    training_func = train

elif method_name == "pso":
    model_class = MLP if model_name == "mlp" else LeNet
    model = model_class(
            n_channels=n_channels,
            n_outputs=n_outputs,
            activation=tnn.ReLU
        ) 
    name_modules(model, "")
    model.tmpname = "root"
    model.to(device)
    model.eval()

    training_cfg = {
        "model_class": model_class,
        "propagation_composite": "pso",
        "objective_func": torch.nn.CrossEntropyLoss(),
        #"objective_func": loss_fns.CustomCrossEntropyLoss(),
        "model": model,
        "optimizer": ParticleSwarmOptimizer(
            model.parameters(), # TODO: tune hyperparams
            cognitive_coefficient=2, # Note: We decay this if particle best was not update for a while
            social_coefficient=2, # Note: We decay this if global best was not update for a while
            inertial_weight=0.8,
            num_particles=1000,
            max_param_value=0.1,
            min_param_value=-0.1
        )   
    }
    training_func = train

elif method_name == "fa":
    model_class = FaMLP if model_name == "mlp" else FaLeNet
    model = model_class(
        n_channels=n_channels,
        n_outputs=n_outputs,
        activation=tnn.ReLU
    )
    name_modules(model, "")
    model.tmpname = "root"
    model.to(device)
    model.eval()

    training_cfg = {
        "model_class": model_class,
        "propagation_composite": None,
        "objective_func": loss_fns.CustomCrossEntropyLoss(),
        "model": model,
        "optimizer": torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9),
    }
    training_func = train

# elif method_name == "ldtp": #https://arxiv.org/pdf/2201.13415
    
elif method_name == "dladmm":
    if model_name != "mlp":
        raise ValueError("dladmm only implemented for mlp")
    dladmm_mnist = dladmm_data.mnist(data_path)
    model_class = dladmm.DladmmNet
    model = tuple(model_class(
        images=torch.transpose(dladmm_mnist.x_train, 0, 1), 
        label=torch.transpose(dladmm_mnist.y_train, 0, 1), 
        num_of_neurons1=120, 
        num_of_neurons2=84,
    )) # Model is just a tuple of parameters, pre-acts, and activations here

    training_cfg = {
        "model_class": model_class,
        "model": model,
        "x_train": dladmm_mnist.x_train,
        "y_train": dladmm_mnist.y_train,
        "x_test": dladmm_mnist.x_test,
        "y_test": dladmm_mnist.y_test,
    }
    training_func = dladmm.train

else:
    raise ValueError

training_cfg = {**training_cfg, **general_params}

### Train, if results not available

In [7]:
result_path = os.path.join(savepath, "result_dict.joblib")
if not os.path.exists(result_path):
    print(f"TRAINING method {method_name} with seed {seed}")
    evals = training_func(**training_cfg)
    joblib.dump(evals, result_path)
else:
    evals = joblib.load(result_path)

for k, v in evals.items():
    print("FINAL EPOCH")
    print(k, v[-1])

TRAINING method pso with seed 13


INIT: (Train Objective) 2.31; (Train Accuracy) 0.10; (Test Objective) 2.31; (Test Accuracy) 0.10


Step 1: 7.754240036010742 better than inf


Step 4: 2.5245656967163086 better than 7.754240036010742


Step 35: 2.514148712158203 better than 2.5245656967163086


Step 36: 2.512770175933838 better than 2.514148712158203


Step 57: 2.5069611072540283 better than 2.512770175933838


Step 58: 2.497843027114868 better than 2.5069611072540283


Epoch 1/50: (Train Objective) 2.28; (Train Accuracy) 0.14; (Test Objective) 2.28; (Test Accuracy) 0.14


Step 69: 2.4436609745025635 better than 2.497843027114868


Step 88: 2.4348886013031006 better than 2.4436609745025635


Step 90: 2.4331562519073486 better than 2.4348886013031006


Step 106: 2.417565107345581 better than 2.4331562519073486


Step 106: 2.4129135608673096 better than 2.417565107345581


Step 116: 2.4078598022460938 better than 2.4129135608673096


Epoch 2/50: (Train Objective) 2.28; (Train Accuracy) 0.14; (Test Objective) 2.28; (Test Accuracy) 0.14


Step 125: 2.3760595321655273 better than 2.4078598022460938


Step 140: 2.375542402267456 better than 2.3760595321655273


Step 140: 2.3689382076263428 better than 2.375542402267456


Step 160: 2.3667213916778564 better than 2.3689382076263428


Step 160: 2.357557535171509 better than 2.3667213916778564


Step 160: 2.3527743816375732 better than 2.357557535171509


Step 169: 2.3476033210754395 better than 2.3527743816375732


Epoch 3/50: (Train Objective) 2.27; (Train Accuracy) 0.12; (Test Objective) 2.27; (Test Accuracy) 0.11


Step 183: 2.345689058303833 better than 2.3476033210754395


Step 201: 2.34421443939209 better than 2.345689058303833


Step 201: 2.3419158458709717 better than 2.34421443939209


Step 213: 2.306917905807495 better than 2.3419158458709717


Epoch 4/50: (Train Objective) 2.26; (Train Accuracy) 0.15; (Test Objective) 2.25; (Test Accuracy) 0.15


Step 251: 2.30627179145813 better than 2.306917905807495


Step 253: 2.305394172668457 better than 2.30627179145813


Step 270: 2.289822578430176 better than 2.305394172668457


Step 277: 2.28743577003479 better than 2.289822578430176


Step 277: 2.2801878452301025 better than 2.28743577003479


Epoch 5/50: (Train Objective) 2.23; (Train Accuracy) 0.19; (Test Objective) 2.22; (Test Accuracy) 0.19


Step 305: 2.2643439769744873 better than 2.2801878452301025


Step 317: 2.264163017272949 better than 2.2643439769744873


Step 338: 2.260317325592041 better than 2.264163017272949


Epoch 6/50: (Train Objective) 2.22; (Train Accuracy) 0.20; (Test Objective) 2.22; (Test Accuracy) 0.20


Step 410: 2.25964093208313 better than 2.260317325592041


Epoch 7/50: (Train Objective) 2.21; (Train Accuracy) 0.21; (Test Objective) 2.21; (Test Accuracy) 0.21


Step 437: 2.2585349082946777 better than 2.25964093208313


Epoch 8/50: (Train Objective) 2.21; (Train Accuracy) 0.21; (Test Objective) 2.21; (Test Accuracy) 0.21


Step 502: 2.2555508613586426 better than 2.2585349082946777


Step 510: 2.254072427749634 better than 2.2555508613586426


Epoch 9/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.23


Epoch 10/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.23


Step 604: 2.253028392791748 better than 2.254072427749634


Epoch 11/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 12/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 13/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 14/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 15/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 16/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 17/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Step 1033: 2.252627372741699 better than 2.253028392791748


Epoch 18/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 19/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 20/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 21/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 22/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Step 1359: 2.248192071914673 better than 2.252627372741699


Epoch 23/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 24/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 25/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 26/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 27/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 28/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 29/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 30/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 31/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 32/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 33/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Step 2001: 2.247478723526001 better than 2.248192071914673


Epoch 34/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 35/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 36/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 37/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 38/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Step 2327: 2.2388834953308105 better than 2.247478723526001


Epoch 39/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 40/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 41/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 42/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 43/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 44/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 45/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 46/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 47/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 48/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 49/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22


Epoch 50/50: (Train Objective) 2.21; (Train Accuracy) 0.22; (Test Objective) 2.20; (Test Accuracy) 0.22
FINAL EPOCH
train_accuracy 0.219200000166893
FINAL EPOCH
train_objective 2.207744765281677
FINAL EPOCH
test_accuracy 0.2249000072479248
FINAL EPOCH
test_objective 2.2046017169952394
FINAL EPOCH
clock_time 2099.4157853126526
