In [1]:
%load_ext autoreload
%autoreload 2

# Libraries

In [2]:
import pickle
from torchvision.transforms import Compose, ToTensor
import pandas as pd
import plotly.express as px
from tqdm import tqdm

In [3]:
from avalanche.benchmarks.classic import PermutedMNIST
from avalanche.models import SimpleMLP
from avalanche.training.plugins import EvaluationPlugin
from avalanche.logging import InteractiveLogger
from avalanche.evaluation.metrics import accuracy_metrics, forgetting_metrics, bwt_metrics, timing_metrics, cpu_usage_metrics, ram_usage_metrics


from torch.nn import CrossEntropyLoss
from torch.optim import SGD

  from .autonotebook import tqdm as notebook_tqdm


## Custom Libraries

In [4]:
import sys
sys.path.append("../base_code/")

from base_code.constants import DATASETS_PATH, SAVED_METRICS_PATH
from base_code.training import CTSV1
from base_code.plugins import WeightStoragePlugin

# Dataset and definitions

## Dataset loading

We load state-of-the-art dataset Modified NIST

In [5]:
scenario = PermutedMNIST(10, seed=1234, dataset_root=DATASETS_PATH)

## Scenario creation with train test streaming

In this point, we define our scenario considering a training where in every experience of it, a new class is presented. This is, first we train with a class $a$, the following experience we train with class $b$ ($a \neq b$)

In [10]:
train_stream = scenario.train_stream
test_stream = scenario.test_stream

## Evaluation metrics definition

In [12]:
eval_plugin = EvaluationPlugin(
    accuracy_metrics(experience=True, stream=True, trained_experience=True),
    forgetting_metrics(experience=True, stream=True),
    bwt_metrics(experience=True, stream=True),
    timing_metrics(epoch=True, epoch_running=True),
    cpu_usage_metrics(experience=True, stream=True),
    ram_usage_metrics(experience=True, stream=True),
    loggers=[InteractiveLogger()]
)

## Plugins definitions

In [13]:
model_plugins = [
    WeightStoragePlugin()
]

## Model, Optimizer, Loss function and Strategy definition

* `model`: Multi Layer Perceptron
* `Optimizer`: Adam
* `Loss function`: Cross Entropy
* `Strategy`: Elastic Weight Consolidation

In [14]:
# model = MLP(n_classes=scenario.n_classes, n_channels=1, width=28, height=28)
model = SimpleMLP(num_classes=scenario.n_classes, input_size=28*28, hidden_layers=2, hidden_size=100, drop_rate=0)
optimizer = SGD(model.parameters(), lr=1e-3)
criterion = CrossEntropyLoss()
strategy = CTSV1(
    model,
    optimizer,
    criterion,
    lambda_l1=1.0, lambda_l2=1e-2, eps=1e-5,
    train_epochs=5, train_mb_size=256, eval_mb_size=128,
    plugins=model_plugins,
    evaluator=eval_plugin,
)

 75%|███████▍  | 351/469 [00:19<00:00, 122.70it/s]

# Training and evaluation

Revisar porque el entrenamiento se está comportando de forma rara

In [15]:
results = list()

for experience in tqdm(train_stream):
    strategy.train(experience)

    # eval on the whole train stream
    metrics = strategy.eval(train_stream)
    results.append(metrics)

    # eval on test
    metrics = strategy.eval(test_stream)
    results.append(metrics)



-- >> Start of training phase << --
100%|██████████| 235/235 [00:04<00:00, 58.67it/s]
Epoch 0 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 4.0059
100%|██████████| 235/235 [00:03<00:00, 59.99it/s]
Epoch 1 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9173
100%|██████████| 235/235 [00:03<00:00, 60.59it/s]
Epoch 2 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.8794
100%|██████████| 235/235 [00:03<00:00, 60.82it/s]
Epoch 3 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.8639
100%|██████████| 235/235 [00:04<00:00, 57.93it/s]
Epoch 4 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 4.0570
-- >> End of training phase << --
-- >> Start of eval phase << -



-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7041
	CPUUsage_Stream/eval_phase/test_stream/Task000 = 105.2933
	MaxRAMUsage_Stream/eval_phase/test_stream/Task000 = 189.5156
	StreamBWT/eval_phase/test_stream = 0.0000
	StreamForgetting/eval_phase/test_stream = 0.0000
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.2235
-- >> Start of training phase << --
100%|██████████| 235/235 [00:04<00:00, 58.63it/s]
Epoch 0 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 4.0092
100%|██████████| 235/235 [00:04<00:00, 58.17it/s]
Epoch 1 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 4.0402
100%|██████████| 235/235 [00:03<00:00, 60.37it/s]
Epoch 2 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.8926
100%|██████████| 235/235 [00:03<00:00, 60.69it/s]




-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.6857
	CPUUsage_Stream/eval_phase/test_stream/Task000 = 105.3211
	MaxRAMUsage_Stream/eval_phase/test_stream/Task000 = 336.2031
	StreamBWT/eval_phase/test_stream = -0.0661
	StreamForgetting/eval_phase/test_stream = 0.0661
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.2975
-- >> Start of training phase << --
100%|██████████| 235/235 [00:03<00:00, 59.98it/s]
Epoch 0 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9190
100%|██████████| 235/235 [00:03<00:00, 60.65it/s]
Epoch 1 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.8750
100%|██████████| 235/235 [00:03<00:00, 60.88it/s]
Epoch 2 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.8605
100%|██████████| 235/235 [00:04<00:00, 58.20it/s]



-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7069
	CPUUsage_Stream/eval_phase/test_stream/Task000 = 105.6768
	MaxRAMUsage_Stream/eval_phase/test_stream/Task000 = 412.7969
	StreamBWT/eval_phase/test_stream = -0.0410
	StreamForgetting/eval_phase/test_stream = 0.0410
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.3460
-- >> Start of training phase << --
100%|██████████| 235/235 [00:03<00:00, 60.31it/s]
Epoch 0 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.8968
100%|██████████| 235/235 [00:03<00:00, 61.00it/s]
Epoch 1 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.8524
100%|██████████| 235/235 [00:03<00:00, 60.98it/s]
Epoch 2 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.8540
100%|██████████| 235/235 [00:03<00:00, 61.14it/s]



-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.6581
	CPUUsage_Stream/eval_phase/test_stream/Task000 = 105.5996
	MaxRAMUsage_Stream/eval_phase/test_stream/Task000 = 319.0469
	StreamBWT/eval_phase/test_stream = -0.1197
	StreamForgetting/eval_phase/test_stream = 0.1197
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.3733
-- >> Start of training phase << --
100%|██████████| 235/235 [00:03<00:00, 60.96it/s]
Epoch 0 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.8559
100%|██████████| 235/235 [00:03<00:00, 61.26it/s]
Epoch 1 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.8364
100%|██████████| 235/235 [00:03<00:00, 61.11it/s]
Epoch 2 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.8458
100%|██████████| 235/235 [00:03<00:00, 60.97it/s]



-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.6890
	CPUUsage_Stream/eval_phase/test_stream/Task000 = 105.6272
	MaxRAMUsage_Stream/eval_phase/test_stream/Task000 = 186.3750
	StreamBWT/eval_phase/test_stream = -0.0883
	StreamForgetting/eval_phase/test_stream = 0.0883
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.4309
-- >> Start of training phase << --
100%|██████████| 235/235 [00:03<00:00, 58.80it/s]
Epoch 0 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9969
100%|██████████| 235/235 [00:04<00:00, 56.33it/s]
Epoch 1 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 4.1719
100%|██████████| 235/235 [00:03<00:00, 59.53it/s]
Epoch 2 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9478
100%|██████████| 235/235 [00:03<00:00, 60.89it/s]



-- >> Start of training phase << --
100%|██████████| 235/235 [00:04<00:00, 58.27it/s]
Epoch 0 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0002
	Time_Epoch/train_phase/train_stream/Task000 = 4.0340
100%|██████████| 235/235 [00:03<00:00, 58.79it/s]
Epoch 1 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9975
100%|██████████| 235/235 [00:03<00:00, 58.91it/s]
Epoch 2 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9894
100%|██████████| 235/235 [00:04<00:00, 57.35it/s]
Epoch 3 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 4.0980
100%|██████████| 235/235 [00:03<00:00, 58.75it/s]
Epoch 4 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 4.0001
-- >> End of training phase << --
-- >> Start of eval phase << -



-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.7050
	CPUUsage_Stream/eval_phase/test_stream/Task000 = 105.9804
	MaxRAMUsage_Stream/eval_phase/test_stream/Task000 = 221.5312
	StreamBWT/eval_phase/test_stream = -0.0838
	StreamForgetting/eval_phase/test_stream = 0.0838
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.5418
-- >> Start of training phase << --
100%|██████████| 235/235 [00:03<00:00, 59.66it/s]
Epoch 0 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9401
100%|██████████| 235/235 [00:03<00:00, 59.24it/s]
Epoch 1 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9675
100%|██████████| 235/235 [00:03<00:00, 59.44it/s]
Epoch 2 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9540
100%|██████████| 235/235 [00:03<00:00, 59.57it/s]



-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.6698
	CPUUsage_Stream/eval_phase/test_stream/Task000 = 105.5671
	MaxRAMUsage_Stream/eval_phase/test_stream/Task000 = 253.2500
	StreamBWT/eval_phase/test_stream = -0.1295
	StreamForgetting/eval_phase/test_stream = 0.1295
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.5612
-- >> Start of training phase << --
100%|██████████| 235/235 [00:03<00:00, 59.23it/s]
Epoch 0 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0002
	Time_Epoch/train_phase/train_stream/Task000 = 3.9683
100%|██████████| 235/235 [00:03<00:00, 59.83it/s]
Epoch 1 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9278
100%|██████████| 235/235 [00:03<00:00, 59.19it/s]
Epoch 2 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9694
100%|██████████| 235/235 [00:03<00:00, 59.38it/s]



-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.6797
	CPUUsage_Stream/eval_phase/test_stream/Task000 = 106.2140
	MaxRAMUsage_Stream/eval_phase/test_stream/Task000 = 220.3594
	StreamBWT/eval_phase/test_stream = -0.1230
	StreamForgetting/eval_phase/test_stream = 0.1230
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.6278
-- >> Start of training phase << --
100%|██████████| 235/235 [00:04<00:00, 57.69it/s]
Epoch 0 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 4.0733
100%|██████████| 235/235 [00:04<00:00, 57.65it/s]
Epoch 1 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 4.0757
100%|██████████| 235/235 [00:03<00:00, 59.23it/s]
Epoch 2 ended.
	RunningTime_Epoch/train_phase/train_stream/Task000 = 0.0001
	Time_Epoch/train_phase/train_stream/Task000 = 3.9667
100%|██████████| 235/235 [00:03<00:00, 59.76it/s]

100%|██████████| 10/10 [11:03<00:00, 66.39s/it]

-- >> End of eval phase << --
	Accuracy_On_Trained_Experiences/eval_phase/test_stream/Task000 = 0.6918
	CPUUsage_Stream/eval_phase/test_stream/Task000 = 105.7018
	MaxRAMUsage_Stream/eval_phase/test_stream/Task000 = 143.6094
	StreamBWT/eval_phase/test_stream = -0.1131
	StreamForgetting/eval_phase/test_stream = 0.1131
	Top1_Acc_Stream/eval_phase/test_stream/Task000 = 0.6918





# Get metrics

## Training Accuracies

In [16]:
training_accuracies: dict[int, list[float]] = dict()

for i in range(10):
    training_accuracies[f"Task{i}"] = eval_plugin.get_all_metrics()[
        f"Top1_Acc_Exp/eval_phase/train_stream/Task000/Exp00{i}"
    ][1]

training_accuracies["Overall"] = eval_plugin.get_all_metrics()[
    "Top1_Acc_Stream/eval_phase/train_stream/Task000"
][1]

## Evaluation Accuracies

In [17]:
accuracies: dict[int, list[float]] = dict()

for i in range(10):
    accuracies[f"Task{i}"] = eval_plugin.get_all_metrics()[
        f"Top1_Acc_Exp/eval_phase/test_stream/Task000/Exp00{i}"
    ][1]

accuracies["Overall"] = eval_plugin.get_all_metrics()[
    "Top1_Acc_Stream/eval_phase/test_stream/Task000"
][1]

## Forgetting measure

In [18]:
forgetting_measures: dict[int, list[float]] = dict()

for i in range(9):
    forgetting_measures[f"Task{i}"] = eval_plugin.get_all_metrics()[
        f"ExperienceForgetting/eval_phase/test_stream/Task000/Exp00{i}"
    ][1]
forgetting_measures["Overall"] = eval_plugin.get_all_metrics()[
    "StreamForgetting/eval_phase/test_stream"
][1]

## Backward Transfer

In [19]:
bwts: dict[int, list[float]] = dict()

for i in range(9):
    bwts[f"Task{i}"] = eval_plugin.get_all_metrics()[
        f"ExperienceBWT/eval_phase/test_stream/Task000/Exp00{i}"
    ][1]
bwts["Overall"] = eval_plugin.get_all_metrics()[
    "StreamBWT/eval_phase/test_stream"
][1]

# Plotting metrics

## Traning accuracies

In [20]:
train_df = pd.DataFrame(training_accuracies)
train_df.index = range(10)

fig = px.line(train_df, x=train_df.index, y=train_df.columns, range_y=[0, 1], title="Training Accuracy vs Task")
fig.show()

## Evaluation accuracies per experience

In [21]:
acc_df = pd.DataFrame(accuracies)
acc_df.index = range(10)

fig = px.line(acc_df, x=acc_df.index, y=acc_df.columns, range_y=[0, 1], title="Test Accuracy vs Task")
fig.show()

## Forgetting measure / BWT

In [22]:
from copy import deepcopy
# transform forgetting_measures dict into df
# but first, we need to make sure that all lists have the same length
max_len = max(map(len, forgetting_measures.values()))
forgetting_measures_tmp = deepcopy(forgetting_measures)
forgetting_measures_tmp = {k: [None] * (max_len - len(v)) + v for k, v in forgetting_measures_tmp.items()}

forgetting_df = pd.DataFrame(forgetting_measures_tmp)
forgetting_df.index = range(10)

fig = px.line(forgetting_df, x=forgetting_df.index, y=forgetting_df.columns, title="Forgetting vs Task")
fig.show()

# Store metrics

In [23]:
pickle.dump(accuracies, open(SAVED_METRICS_PATH / "cts.pkl", "wb"))

# Store weights

In [24]:
pickle.dump(model_plugins[0].weights, open(SAVED_METRICS_PATH / "cts_weights.pkl", "wb"))