***
## Learning without Forgetting - LwF

Experiment reproducing **Learning without Forgetting** method:  
hybrid of Distillation Networks and fine-tuning, which refers to the re-training with a low learning rate an already trained model M with new and more specific dataset, D<sub>new</sub>, with respect to the dataset, D<sub>old</sub>, with which the given model M was originally trained.

LwF, as opposed to other continual learning techniques, only uses the new data, so it assumes that past data used to pre-train the network is unavailable.  
It is a *transfer learning technique*.

`References:`
- Learning without Forgetting: https://arxiv.org/abs/1606.09282
- Three scenarios for continual learning: https://arxiv.org/abs/1904.07734

In [5]:
# Testing framework and test runner
import unittest

# Avalanche library
import avalanche as avl
from avalanche.evaluation import metrics as metrics

import torch
from torch.nn import CrossEntropyLoss
from torch.optim import Adam, SGD

# Avalanche NN model
from models import MLP

from utils import create_default_args, get_average_metric, get_target_result


class LwF(unittest.TestCase): #TestCase class

    ## ------- Split MNIST benchmark ------- ##
    def test_smnist(self, override_args=None):
        
        args = create_default_args({'cuda': 0,              #
                                    'lwf_alpha': 1,         #
                                    'lwf_temperature': 1,   #
                                    'epochs': 10,           #
                                    'layers': 1,            #
                                    'hidden_size': 256,     #
                                    'learning_rate': 0.001, #
                                    'train_mb_size': 128}, override_args) #

        # Set up and run CUDA operations.
        # if CUDA is available, utilize GPUs for computation.
        device = torch.device(f"cuda:{args.cuda}"
                              if torch.cuda.is_available() and args.cuda >= 0 
                              else "cpu")
        
        # Define the benchmark:
        # stream composed of 5 experiences from SplitMNIST dataset
        benchmark = avl.benchmarks.SplitMNIST(5, return_task_id=False)
        
        # NN model and loss function
        model = MLP(hidden_size=args.hidden_size, hidden_layers=args.layers)
        criterion = CrossEntropyLoss()

        # Avalanche logging module, displays a progress bar during training and evaluation
        interactive_logger = avl.logging.InteractiveLogger()
        
        # Metrics of main interest to be evaluated
        evaluation_plugin = avl.training.plugins.EvaluationPlugin(
            metrics.accuracy_metrics(epoch=True, experience=True, stream=True),
            loggers=[interactive_logger], benchmark=benchmark)

        # Define the Continual Learning strategy LwF with the previously assigned parameters
        cl_strategy = avl.training.LwF(
            model, SGD(model.parameters(), lr=args.learning_rate), criterion,
            alpha=args.lwf_alpha, temperature=args.lwf_temperature,
            train_mb_size=args.train_mb_size, train_epochs=args.epochs,
            device=device, evaluator=evaluation_plugin)

        for experience in benchmark.train_stream:
            cl_strategy.train(experience)
            res = cl_strategy.eval(benchmark.test_stream)

        avg_stream_acc = get_average_metric(res)
        print(f"LwF-SMNIST Average Stream Accuracy: {avg_stream_acc:.2f}")

        target_acc = float(get_target_result('lwf', 'smnist'))
        if args.check:
            self.assertAlmostEqual(target_acc, avg_stream_acc, delta=0.02)