In [1]:
%load_ext autoreload
%autoreload 2

# Libraries

In [2]:
from torchvision.transforms import Compose, ToTensor
from torchsummary import summary
import pickle
import pandas as pd
import plotly.express as px
from tqdm import tqdm

In [12]:
from avalanche.benchmarks.classic import PermutedMNIST
from avalanche.models import SimpleMLP
from avalanche.training.plugins import EvaluationPlugin
from avalanche.logging import InteractiveLogger
from avalanche.evaluation.metrics import accuracy_metrics, forgetting_metrics, bwt_metrics

from torch.nn import CrossEntropyLoss
from torch.optim import SGD

## Custom Libraries

In [4]:
import sys

sys.path.append("../base_code/")

from base_code.constants import DATASETS_PATH, SAVED_METRICS_PATH
from base_code.training import MWUN
from base_code.plugins import WeightStoragePlugin

# Dataset and definitions

## Dataset loading

We load state-of-the-art dataset Modified NIST

In [5]:
scenario = PermutedMNIST(10, seed=1234, dataset_root=DATASETS_PATH)

## Scenario creation with train test streaming

In this point, we define our scenario considering a training where in every experience of it, a new class is presented. This is, first we train with a class $a$, the following experience we train with class $b$ ($a \neq b$)

In [6]:
train_stream = scenario.train_stream
test_stream = scenario.test_stream

## Evaluation metrics definition

In [77]:
eval_plugin = EvaluationPlugin(
    accuracy_metrics(experience=True, stream=True, trained_experience=True),
    forgetting_metrics(experience=True, stream=True),
    bwt_metrics(experience=True, stream=True),
    loggers=[InteractiveLogger()]
)

## Plugin defitinitions

In [78]:
model_plugins = [WeightStoragePlugin()]

## Model, Optimizer, Loss function and Strategy definition

* `model`: Multi Layer Perceptron
* `Optimizer`: Adam
* `Loss function`: Cross Entropy
* `Strategy`: Elastic Weight Consolidation

In [79]:
# model = MLP(n_classes=scenario.n_classes, n_channels=1, width=28, height=28)
model = SimpleMLP(num_classes=scenario.n_classes, input_size=28 * 28, hidden_layers=2, hidden_size=100)
optimizer = SGD(model.parameters(), lr=1e-3)
criterion = CrossEntropyLoss()
strategy = MWUN(
    model,
    optimizer,
    criterion,
    eps=1e-5,
    train_epochs=5,
    train_mb_size=128,
    evaluator=eval_plugin,
    eval_mb_size=128,
)

# Training and evaluation

Revisar porque el entrenamiento se está comportando de forma rara

In [80]:
results = list()

for experience in tqdm(train_stream):
    strategy.train(experience)

    # eval on the whole train stream
    metrics = strategy.eval(train_stream)
    results.append(metrics)

    # eval on test
    metrics = strategy.eval(test_stream)
    results.append(metrics)

    

  0%|          | 0/10 [00:00<?, ?it/s]

-- >> Start of training phase << --
100%|██████████| 469/469 [00:04<00:00, 108.33it/s]
Epoch 0 ended.
100%|██████████| 469/469 [00:04<00:00, 113.53it/s]
Epoch 1 ended.
100%|██████████| 469/469 [00:04<00:00, 113.04it/s]
Epoch 2 ended.
100%|██████████| 469/469 [00:04<00:00, 113.55it/s]
Epoch 3 ended.
100%|██████████| 469/469 [00:04<00:00, 112.52it/s]
Epoch 4 ended.
-- >> End of training phase << --
-- >> Start of eval phase << --
-- Starting eval on experience 0 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 135.47it/s]
> Eval on experience 0 (Task 0) from train stream ended.
	Top1_Acc_Exp/eval_phase/train_stream/Task000/Exp000 = 0.7375
-- Starting eval on experience 1 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 135.43it/s]
> Eval on experience 1 (Task 0) from train stream ended.
	Top1_Acc_Exp/eval_phase/train_stream/Task000/Exp001 = 0.1767
-- Starting eval on experience 2 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 

 10%|█         | 1/10 [01:01<09:15, 61.72s/it]


> Eval on experience 9 (Task 0) from test stream ended.
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp009 = 0.1489
-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7415
	StreamBWT/eval_phase/test_stream = 0.0000
	StreamForgetting/eval_phase/test_stream = 0.0000
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.2056
-- >> Start of training phase << --
100%|██████████| 469/469 [00:04<00:00, 112.40it/s]
Epoch 0 ended.
100%|██████████| 469/469 [00:04<00:00, 111.99it/s]
Epoch 1 ended.
100%|██████████| 469/469 [00:04<00:00, 112.61it/s]
Epoch 2 ended.
100%|██████████| 469/469 [00:04<00:00, 112.03it/s]
Epoch 3 ended.
100%|██████████| 469/469 [00:04<00:00, 112.74it/s]
Epoch 4 ended.
-- >> End of training phase << --
-- >> Start of eval phase << --
-- Starting eval on experience 0 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 133.96it/s]
> Eval on experience 0 (Task 0) from train stream ended.
	ExperienceBWT/eval_phase/

 20%|██        | 2/10 [02:03<08:16, 62.04s/it]


> Eval on experience 9 (Task 0) from test stream ended.
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp009 = 0.1946
-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7529
	StreamBWT/eval_phase/test_stream = -0.0244
	StreamForgetting/eval_phase/test_stream = 0.0244
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.2803
-- >> Start of training phase << --
100%|██████████| 469/469 [00:04<00:00, 112.45it/s]
Epoch 0 ended.
100%|██████████| 469/469 [00:04<00:00, 112.20it/s]
Epoch 1 ended.
100%|██████████| 469/469 [00:04<00:00, 112.48it/s]
Epoch 2 ended.
100%|██████████| 469/469 [00:04<00:00, 111.62it/s]
Epoch 3 ended.
100%|██████████| 469/469 [00:04<00:00, 112.42it/s]
Epoch 4 ended.
-- >> End of training phase << --
-- >> Start of eval phase << --
-- Starting eval on experience 0 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 133.82it/s]
> Eval on experience 0 (Task 0) from train stream ended.
	ExperienceBWT/eval_phase

 30%|███       | 3/10 [03:05<07:14, 62.01s/it]


> Eval on experience 9 (Task 0) from test stream ended.
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp009 = 0.1392
-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7337
	StreamBWT/eval_phase/test_stream = -0.0642
	StreamForgetting/eval_phase/test_stream = 0.0642
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.3208
-- >> Start of training phase << --
100%|██████████| 469/469 [00:04<00:00, 112.63it/s]
Epoch 0 ended.
100%|██████████| 469/469 [00:04<00:00, 112.10it/s]
Epoch 1 ended.
100%|██████████| 469/469 [00:04<00:00, 112.87it/s]
Epoch 2 ended.
100%|██████████| 469/469 [00:04<00:00, 112.39it/s]
Epoch 3 ended.
100%|██████████| 469/469 [00:04<00:00, 113.54it/s]
Epoch 4 ended.
-- >> End of training phase << --
-- >> Start of eval phase << --
-- Starting eval on experience 0 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 135.03it/s]
> Eval on experience 0 (Task 0) from train stream ended.
	ExperienceBWT/eval_phase

 40%|████      | 4/10 [07:10<13:24, 134.04s/it]

-- >> Start of training phase << --
100%|██████████| 469/469 [00:04<00:00, 105.07it/s]
Epoch 0 ended.
100%|██████████| 469/469 [00:04<00:00, 112.52it/s]
Epoch 1 ended.
100%|██████████| 469/469 [00:04<00:00, 112.53it/s]
Epoch 2 ended.
100%|██████████| 469/469 [00:04<00:00, 112.31it/s]
Epoch 3 ended.
100%|██████████| 469/469 [00:04<00:00, 112.98it/s]
Epoch 4 ended.
-- >> End of training phase << --
-- >> Start of eval phase << --
-- Starting eval on experience 0 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 134.23it/s]
> Eval on experience 0 (Task 0) from train stream ended.
	ExperienceBWT/eval_phase/train_stream/Task000/Exp000 = -0.1344
	ExperienceForgetting/eval_phase/train_stream/Task000/Exp000 = 0.1344
	Top1_Acc_Exp/eval_phase/train_stream/Task000/Exp000 = 0.6071
-- Starting eval on experience 1 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 134.59it/s]
> Eval on experience 1 (Task 0) from train stream ended.
	ExperienceBWT/eval_phase/trai

 50%|█████     | 5/10 [08:12<09:00, 108.12s/it]


> Eval on experience 9 (Task 0) from test stream ended.
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp009 = 0.1270
-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7513
	StreamBWT/eval_phase/test_stream = -0.0509
	StreamForgetting/eval_phase/test_stream = 0.0509
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.4480
-- >> Start of training phase << --
100%|██████████| 469/469 [00:04<00:00, 112.57it/s]
Epoch 0 ended.
100%|██████████| 469/469 [00:04<00:00, 112.68it/s]
Epoch 1 ended.
100%|██████████| 469/469 [00:04<00:00, 109.36it/s]
Epoch 2 ended.
100%|██████████| 469/469 [00:04<00:00, 110.66it/s]
Epoch 3 ended.
100%|██████████| 469/469 [00:04<00:00, 103.04it/s]
Epoch 4 ended.
-- >> End of training phase << --
-- >> Start of eval phase << --
-- Starting eval on experience 0 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 137.61it/s]
> Eval on experience 0 (Task 0) from train stream ended.
	ExperienceBWT/eval_phase

 60%|██████    | 6/10 [09:14<06:09, 92.37s/it] 


> Eval on experience 9 (Task 0) from test stream ended.
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp009 = 0.1095
-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7373
	StreamBWT/eval_phase/test_stream = -0.0733
	StreamForgetting/eval_phase/test_stream = 0.0733
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.5023
-- >> Start of training phase << --
100%|██████████| 469/469 [00:04<00:00, 116.50it/s]
Epoch 0 ended.
100%|██████████| 469/469 [00:04<00:00, 116.98it/s]
Epoch 1 ended.
100%|██████████| 469/469 [00:03<00:00, 117.35it/s]
Epoch 2 ended.
100%|██████████| 469/469 [00:03<00:00, 118.00it/s]
Epoch 3 ended.
100%|██████████| 469/469 [00:04<00:00, 117.21it/s]
Epoch 4 ended.
-- >> End of training phase << --
-- >> Start of eval phase << --
-- Starting eval on experience 0 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 141.37it/s]
> Eval on experience 0 (Task 0) from train stream ended.
	ExperienceBWT/eval_phase

 70%|███████   | 7/10 [10:14<04:05, 81.68s/it]


> Eval on experience 9 (Task 0) from test stream ended.
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp009 = 0.0932
-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7238
	StreamBWT/eval_phase/test_stream = -0.0929
	StreamForgetting/eval_phase/test_stream = 0.0929
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.5447
-- >> Start of training phase << --
100%|██████████| 469/469 [00:04<00:00, 114.02it/s]
Epoch 0 ended.
100%|██████████| 469/469 [00:04<00:00, 115.80it/s]
Epoch 1 ended.
100%|██████████| 469/469 [00:04<00:00, 110.09it/s]
Epoch 2 ended.
100%|██████████| 469/469 [00:04<00:00, 112.71it/s]
Epoch 3 ended.
100%|██████████| 469/469 [00:04<00:00, 111.52it/s]
Epoch 4 ended.
-- >> End of training phase << --
-- >> Start of eval phase << --
-- Starting eval on experience 0 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 135.58it/s]
> Eval on experience 0 (Task 0) from train stream ended.
	ExperienceBWT/eval_phase

 80%|████████  | 8/10 [11:16<02:30, 75.46s/it]


> Eval on experience 9 (Task 0) from test stream ended.
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp009 = 0.1047
-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7062
	StreamBWT/eval_phase/test_stream = -0.1162
	StreamForgetting/eval_phase/test_stream = 0.1162
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.5871
-- >> Start of training phase << --
100%|██████████| 469/469 [00:04<00:00, 111.93it/s]
Epoch 0 ended.
100%|██████████| 469/469 [00:04<00:00, 111.97it/s]
Epoch 1 ended.
100%|██████████| 469/469 [00:04<00:00, 112.26it/s]
Epoch 2 ended.
100%|██████████| 469/469 [00:04<00:00, 113.06it/s]
Epoch 3 ended.
100%|██████████| 469/469 [00:04<00:00, 110.95it/s]
Epoch 4 ended.
-- >> End of training phase << --
-- >> Start of eval phase << --
-- Starting eval on experience 0 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 134.05it/s]
> Eval on experience 0 (Task 0) from train stream ended.
	ExperienceBWT/eval_phase

 90%|█████████ | 9/10 [12:18<01:11, 71.41s/it]


> Eval on experience 9 (Task 0) from test stream ended.
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp009 = 0.1175
-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7143
	StreamBWT/eval_phase/test_stream = -0.1101
	StreamForgetting/eval_phase/test_stream = 0.1101
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.6546
-- >> Start of training phase << --
100%|██████████| 469/469 [00:04<00:00, 111.71it/s]
Epoch 0 ended.
100%|██████████| 469/469 [00:04<00:00, 111.01it/s]
Epoch 1 ended.
100%|██████████| 469/469 [00:04<00:00, 107.95it/s]
Epoch 2 ended.
100%|██████████| 469/469 [00:04<00:00, 107.62it/s]
Epoch 3 ended.
100%|██████████| 469/469 [00:04<00:00, 110.18it/s]
Epoch 4 ended.
-- >> End of training phase << --
-- >> Start of eval phase << --
-- Starting eval on experience 0 (Task 0) from train stream --
100%|██████████| 469/469 [00:03<00:00, 133.71it/s]
> Eval on experience 0 (Task 0) from train stream ended.
	ExperienceBWT/eval_phase

100%|██████████| 10/10 [13:21<00:00, 80.14s/it]


> Eval on experience 9 (Task 0) from test stream ended.
	Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp009 = 0.8480
-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7161
	StreamBWT/eval_phase/test_stream = -0.1107
	StreamForgetting/eval_phase/test_stream = 0.1107
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.7161





In [81]:
eval_plugin.get_all_metrics()

defaultdict(<function avalanche.training.plugins.evaluation._init_metrics_list_lambda()>,
            {'Top1_Acc_Exp/eval_phase/train_stream/Task000/Exp000': ([2345,
               4690,
               7035,
               9380,
               11725,
               14070,
               16415,
               18760,
               21105,
               23450],
              [0.7375,
               0.7092666666666667,
               0.64235,
               0.6308666666666667,
               0.6070833333333333,
               0.5614166666666667,
               0.4996833333333333,
               0.4170833333333333,
               0.4699,
               0.4620666666666667]),
             'Top1_Acc_Exp/eval_phase/train_stream/Task000/Exp001': ([2345,
               4690,
               7035,
               9380,
               11725,
               14070,
               16415,
               18760,
               21105,
               23450],
              [0.17666666666666667,
             

In [76]:
results

[{'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp000': 0.7742,
  'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp001': 0.1637,
  'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp002': 0.1197,
  'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp003': 0.1149,
  'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp004': 0.2006,
  'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp005': 0.0881,
  'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp006': 0.1976,
  'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp007': 0.1815,
  'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp008': 0.2237,
  'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp009': 0.1609,
  'Top1_Acc_Stream/eval_phase/test_stream/Task000': 0.22249,
  'Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000': 0.7742,
  'StreamForgetting/eval_phase/test_stream': 0.0,
  'StreamBWT/eval_phase/test_stream': 0.0},
 {'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp000': 0.7235,
  'Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp001': 0.7911,
 

# Get metrics

## Training Accuracies

In [88]:
training_accuracies: dict[int, list[float]] = dict()

for i in range(10):
    training_accuracies[f"Task{i}"] = eval_plugin.get_all_metrics()[
        f"Top1_Acc_Exp/eval_phase/train_stream/Task000/Exp00{i}"
    ][1]

training_accuracies["Overall"] = eval_plugin.get_all_metrics()[
    "Top1_Acc_Stream/eval_phase/train_stream/Task000"
][1]

## Evaluation Accuracies

In [89]:
accuracies: dict[int, list[float]] = dict()

for i in range(10):
    accuracies[f"Task{i}"] = eval_plugin.get_all_metrics()[
        f"Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp00{i}"
    ][1][-10:]

accuracies["Overall"] = eval_plugin.get_all_metrics()[
    "Top1_Acc_Stream/eval_phase/test_stream/Task000"
][1][-10:]

## Forgetting measure

In [90]:
forgetting_measures: dict[int, list[float]] = dict()

for i in range(9):
    forgetting_measures[f"Task{i}"] = eval_plugin.get_all_metrics()[
        f"ExperienceForgetting/eval_phase/test_stream/Task000/Exp00{i}"
    ][1]
forgetting_measures["Overall"] = eval_plugin.get_all_metrics()[
    "StreamForgetting/eval_phase/test_stream"
][1]

## Backward Transfer

In [91]:
bwts: dict[int, list[float]] = dict()

for i in range(9):
    bwts[f"Task{i}"] = eval_plugin.get_all_metrics()[
        f"ExperienceBWT/eval_phase/test_stream/Task000/Exp00{i}"
    ][1]
bwts["Overall"] = eval_plugin.get_all_metrics()[
    "StreamBWT/eval_phase/test_stream"
][1]

# Plotting metrics

## Traning accuracies

In [92]:
train_df = pd.DataFrame(training_accuracies)
train_df.index = range(10)

fig = px.line(train_df, x=train_df.index, y=train_df.columns, range_y=[0, 1])
fig.show()

## Evaluation accuracies per experience

In [93]:
acc_df = pd.DataFrame(accuracies)
acc_df.index = range(10)

fig = px.line(acc_df, x=acc_df.index, y=acc_df.columns, range_y=[0, 1])
fig.show()

## Forgetting measure / BWT

In [95]:
from copy import deepcopy
# transform forgetting_measures dict into df
# but first, we need to make sure that all lists have the same length
max_len = max(map(len, forgetting_measures.values()))
forgetting_measures_tmp = deepcopy(forgetting_measures)
forgetting_measures_tmp = {k: [None] * (max_len - len(v)) + v for k, v in forgetting_measures_tmp.items()}

forgetting_df = pd.DataFrame(forgetting_measures_tmp)
forgetting_df.index = range(10)

fig = px.line(forgetting_df, x=forgetting_df.index, y=forgetting_df.columns)
fig.show()

# Store metrics

In [96]:
pickle.dump(eval_plugin.get_all_metrics(), open(SAVED_METRICS_PATH / "mwun.pkl", "wb"))