In [1]:
from torchvision.datasets import MNIST, FashionMNIST, CIFAR10
import torchvision
import numpy as np
import random

import torch
import torch.nn.functional as F
import cl_gym as cl

import sys
import os

seed = 1

np.random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
# torch.backends.cudnn.enabled = False

def make_params() -> dict:
    import os
    from pathlib import Path
    import uuid

    params = {
            # dataset
            'dataset': "CIFAR10",
            # 'dataset': "FMNIST",

            # benchmark
            'seed': seed,
            'num_tasks': 5,
            'epochs_per_task': 1,
            # 'per_task_examples': 10000,
            'per_task_examples': np.inf,
            'per_task_memory_examples': 20,
            'batch_size_train': 128,
            'batch_size_memory': 128,
            'batch_size_validation': 256,
            'tau': 1,

            # algorithm
            'optimizer': 'sgd',
            'learning_rate': 0.001,
            'momentum': 0.9,
            'learning_rate_decay': 1.0,
            'criterion': torch.nn.CrossEntropyLoss(),
            'device': torch.device('cuda:3' if torch.cuda.is_available() else 'cpu'),
             
            # sample selection
            'alpha':0.001
              }

#     trial_id = str(uuid.uuid4())
    trial_id = f"demo/dataset={params['dataset']}/seed={params['seed']}_epoch={params['epochs_per_task']}_lr={params['learning_rate']}_tau={params['tau']}_alpha={params['alpha']}"
    params['trial_id'] = trial_id
    params['output_dir'] = os.path.join("./outputs/{}".format(trial_id))
    print(f"output_dir={params['output_dir']}")
    Path(params['output_dir']).mkdir(parents=True, exist_ok=True)

    return params

params = make_params()

output_dir=./outputs/demo/dataset=CIFAR10/seed=1_epoch=1_lr=0.001_tau=1_alpha=0.001


In [2]:
from datasets import MNIST
from datasets import FashionMNIST
from datasets import CIFAR10, CIFAR100

if params['dataset'] == 'MNIST':
    benchmark = MNIST(num_tasks=params['num_tasks'],
                      per_task_memory_examples=params['per_task_memory_examples'],
                      per_task_examples = params['per_task_examples'],
                      random_class_idx = False)
    label_li = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
    n_feature = 28*28

elif params['dataset'] == 'FMNIST':
    benchmark = FashionMNIST(num_tasks=params['num_tasks'],
                             per_task_memory_examples=params['per_task_memory_examples'],
                             per_task_examples = params['per_task_examples'],
                             random_class_idx = False)

    label_li = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 
                  'Ankel boot']
    n_feature = 28*28
    
elif params['dataset'] == 'CIFAR10':
    label_li = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    benchmark = CIFAR10(num_tasks=params['num_tasks'],
                        per_task_memory_examples=params['per_task_memory_examples'],
                        per_task_examples = params['per_task_examples'],
                        random_class_idx = False)
    n_feature = 32*32*3

elif params['dataset'] == 'CIFAR100':
    label_li = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    benchmark = CIFAR100(num_tasks=params['num_tasks'],
                        per_task_memory_examples=params['per_task_memory_examples'],
                        per_task_examples = params['per_task_examples'],
                        random_class_idx = False)
    n_feature = 32*32*3

[0 1 2 3 4 5 6 7 8 9]
Files already downloaded and verified
Files already downloaded and verified


In [3]:
from algorithms.imbalance import Heuristic2
from metrics import MetricCollector2

# backbone = cl.backbones.MLP2Layers(input_dim=784, hidden_dim_1=256, hidden_dim_2=256, output_dim=10)
backbone = cl.backbones.ResNet18Small(num_classes_per_head = 2, num_classes = 10, config=params).to(params['device'])
algorithm = Heuristic2(backbone, benchmark, params, requires_memory=True)
metric_manager_callback = MetricCollector2(num_tasks=params['num_tasks'],
                                                        eval_interval='epoch',
                                                        epochs_per_task=params['epochs_per_task'])

In [6]:
cnt = 0
for p in backbone.parameters():
    # print(n)
    print(p.shape)
    cnt+=1
print(cnt)

torch.Size([20, 3, 3, 3])
torch.Size([20])
torch.Size([20])
torch.Size([20, 20, 3, 3])
torch.Size([20])
torch.Size([20])
torch.Size([20, 20, 3, 3])
torch.Size([20])
torch.Size([20])
torch.Size([20, 20, 3, 3])
torch.Size([20])
torch.Size([20])
torch.Size([20, 20, 3, 3])
torch.Size([20])
torch.Size([20])
torch.Size([40, 20, 3, 3])
torch.Size([40])
torch.Size([40])
torch.Size([40, 40, 3, 3])
torch.Size([40])
torch.Size([40])
torch.Size([40, 20, 1, 1])
torch.Size([40, 40, 3, 3])
torch.Size([40])
torch.Size([40])
torch.Size([40, 40, 3, 3])
torch.Size([40])
torch.Size([40])
torch.Size([80, 40, 3, 3])
torch.Size([80])
torch.Size([80])
torch.Size([80, 80, 3, 3])
torch.Size([80])
torch.Size([80])
torch.Size([80, 40, 1, 1])
torch.Size([80, 80, 3, 3])
torch.Size([80])
torch.Size([80])
torch.Size([80, 80, 3, 3])
torch.Size([80])
torch.Size([80])
torch.Size([160, 80, 3, 3])
torch.Size([160])
torch.Size([160])
torch.Size([160, 160, 3, 3])
torch.Size([160])
torch.Size([160])
torch.Size([160, 80, 1, 1

In [7]:
cnt=0
for n, p in backbone.named_parameters():
    print(n)
    print(p.shape)
    cnt+=1
cnt

conv1.weight
torch.Size([20, 3, 3, 3])
bn1.weight
torch.Size([20])
bn1.bias
torch.Size([20])
layer1.0.conv1.weight
torch.Size([20, 20, 3, 3])
layer1.0.bn1.weight
torch.Size([20])
layer1.0.bn1.bias
torch.Size([20])
layer1.0.conv2.weight
torch.Size([20, 20, 3, 3])
layer1.0.bn2.weight
torch.Size([20])
layer1.0.bn2.bias
torch.Size([20])
layer1.1.conv1.weight
torch.Size([20, 20, 3, 3])
layer1.1.bn1.weight
torch.Size([20])
layer1.1.bn1.bias
torch.Size([20])
layer1.1.conv2.weight
torch.Size([20, 20, 3, 3])
layer1.1.bn2.weight
torch.Size([20])
layer1.1.bn2.bias
torch.Size([20])
layer2.0.conv1.weight
torch.Size([40, 20, 3, 3])
layer2.0.bn1.weight
torch.Size([40])
layer2.0.bn1.bias
torch.Size([40])
layer2.0.conv2.weight
torch.Size([40, 40, 3, 3])
layer2.0.bn2.weight
torch.Size([40])
layer2.0.bn2.bias
torch.Size([40])
layer2.0.shortcut.0.weight
torch.Size([40, 20, 1, 1])
layer2.1.conv1.weight
torch.Size([40, 40, 3, 3])
layer2.1.bn1.weight
torch.Size([40])
layer2.1.bn1.bias
torch.Size([40])
layer2

56

In [4]:
from trainers import ContinualTrainer

trainer = ContinualTrainer(algorithm, params, callbacks=[metric_manager_callback])
trainer.run()
print("final avg-acc", metric_manager_callback.meters['accuracy'].compute_final())
print("final avg-forget", metric_manager_callback.meters['forgetting'].compute_final())

---------------------------- Task 1 -----------------------
solver=<function LS_solver at 0x7fbc5a7deca0>
[1] Eval metrics for task 1 >> {'accuracy': 83.75, 'loss': 0.0014423785209655763, 'std': 4.449999999999998}
training_task_end
---------------------------- Task 2 -----------------------
solver=<function LS_solver at 0x7fbc5a7deca0>
losses=tensor([[2.0073, 2.1701, 2.6903, 2.2058]])
A_np.shape=(4, 10000)
b_np.shape=(4,)
Elapsed time:46.554
Loss difference:[ 2.52058974e-09 -1.67031461e-09  1.58451763e-09  1.64000519e-09]
len(updated_seq_indices)=2788
[2] Eval metrics for task 1 >> {'accuracy': 63.85000000000001, 'loss': 0.003664962440729141, 'std': 1.250000000000001}
[2] Eval metrics for task 2 >> {'accuracy': 50.0, 'loss': 0.004260771095752716, 'std': 50.0}
training_task_end
---------------------------- Task 3 -----------------------
solver=<function LS_solver at 0x7fbc5a7deca0>
losses=tensor([[1.4675, 1.0663, 1.6982, 3.4718, 2.6347, 1.7467]])
A_np.shape=(6, 10000)
b_np.shape=(6,)
El

In [5]:
metric_manager_callback.meters['std'].get_std()

[4.449999999999998,
 36.03799210555438,
 38.318866835484,
 13.158261425811542,
 8.806412436401104]

In [6]:
metric_manager_callback.meters['accuracy'].get_data()

array([[83.75,  0.  ,  0.  ,  0.  ,  0.  ],
       [63.85, 50.  ,  0.  ,  0.  ,  0.  ],
       [60.4 , 51.5 , 50.  ,  0.  ,  0.  ],
       [58.65, 64.3 , 68.4 , 64.4 ,  0.  ],
       [63.35, 64.1 , 68.3 , 66.1 , 76.1 ]])

In [7]:
metric_manager_callback.meters['accuracy'].compute_overall()

65.23383333333334

In [8]:
metric_manager_callback.meters['accuracy'].compute_final()

67.59

In [9]:
np.mean(metric_manager_callback.meters['std'].get_std())

20.154306560650205

In [10]:
np.mean(metric_manager_callback.meters['accuracy'].compute_overall())

65.23383333333334