In [1]:
from torchvision.datasets import MNIST, FashionMNIST, CIFAR10
import torchvision
import numpy as np
import random

import torch
import torch.nn.functional as F
import cl_gym as cl

import sys
import os
sys.path.append(os.path.abspath(".."))

# dataset = 'MNIST'
seed = 1
dataset = 'FMNIST'
# dataset = 'CIFAR10'

np.random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.cuda.manual_seed_all(seed)
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.enabled = False

def make_params() -> dict:
    import os
    from pathlib import Path
    import uuid

    params = {
            # dataset
            'dataset': "FMNIST",

            # benchmark
            'seed': seed,
            'num_tasks': 5,
            'epochs_per_task': 1,
            'per_task_examples': np.inf,
            'per_task_memory_examples': 64,
            'batch_size_train': 64,
            'batch_size_memory': 64,
            'batch_size_validation': 256,
            'lambda': 10,

            # algorithm
            'optimizer': 'Adam',
            'learning_rate': 0.001,
            'momentum': 0.8,
            'learning_rate_decay': 1.0,
            'criterion': torch.nn.CrossEntropyLoss(),
            'device': torch.device('cuda:6' if torch.cuda.is_available() else 'cpu'), }

#     trial_id = str(uuid.uuid4())
    trial_id = f"seed={params['seed']}_epoch={params['epochs_per_task']}q__"
    params['trial_id'] = trial_id
    params['output_dir'] = os.path.join("./outputs/{}".format(trial_id))
    print(f"output_dir={params['output_dir']}")
    Path(params['output_dir']).mkdir(parents=True, exist_ok=True)

    return params

params = make_params()

output_dir=./outputs/seed=1_epoch=1q__


In [2]:
if params['dataset'] == 'MNIST':
    benchmark = cl.benchmarks.SplitMNIST(num_tasks=params['num_tasks'],
                                        per_task_memory_examples=params['per_task_memory_examples'])
    label_li = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

elif params['dataset'] == 'FMNIST':
    from datasets.FashionMNIST import FashionMNIST
    benchmark = FashionMNIST(num_tasks=params['num_tasks'],
                             per_task_memory_examples=params['per_task_memory_examples'],
                             per_task_examples = min(params['per_task_examples'], 12000))

    label_li = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 
                  'Ankel boot']
    # base_transform = transforms.Compose([transforms.ToTensor()])
    train_dataset = benchmark.fashion_mnist_train
    test_dataset = benchmark.fashion_mnist_test
    
elif params['dataset'] == 'CIFAR10':
    label_li = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
    benchmark = cl.benchmarks.SplitCIFAR10(num_tasks=params['num_tasks'],
                                        per_task_memory_examples=params['per_task_memory_examples'])

[0 1 2 3 4 5 6 7 8 9]


In [3]:
from sklearn.metrics import accuracy_score, confusion_matrix
from torchvision import models
from torchinfo import summary
# from resnet import ResNet18
import random
import copy


from algorithms.mean_std_min import Heuristic2
from metrics.fair_metric_manager import FairMetricCollector

if dataset in ['MNIST', 'FMNIST']:
    n_feature = 28*28
elif dataset in ['CIFAR10']:
    n_feature = 32*32*3
    
params['alpha'] = 0.0501
params['lambda'] = 10

# alpha_li = [0.05]
# lamb_li = [50]

# alpha_li = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1]
# lamb_li = [0.01, 0.05, 0.1, 1, 10, 50, 100]

backbone = cl.backbones.MLP2Layers(input_dim=784, hidden_dim_1=256, hidden_dim_2=256, output_dim=10)
algorithm = Heuristic2(backbone, benchmark, params, requires_memory=True)
metric_manager_callback = cl.utils.callbacks.MetricCollector(num_tasks=params['num_tasks'],
                                                        eval_interval='epoch',
                                                        epochs_per_task=params['epochs_per_task'])

# from trainers.FairContinualTrainer import FairContinualTrainer
# trainer = FairContinualTrainer(algorithm, params, callbacks=[metric_manager_callback])

trainer = cl.trainer.ContinualTrainer(algorithm, params, callbacks=[metric_manager_callback])
trainer.run()
print("final avg-acc", metric_manager_callback.meters['accuracy'].compute_final())
print("final avg-forget", metric_manager_callback.meters['forgetting'].compute_final())

    


---------------------------- Task 1 -----------------------
[1] Eval metrics for task 1 >> {'accuracy': 98.6, 'loss': 0.00015132083231583237}
training_task_end
load_memory_joint: len(train_loader.dataset)=64
---------------------------- Task 2 -----------------------
loss_matrix.shape=torch.Size([12000, 4])
loss_matrix.shape=torch.Size([11999, 4])
loss_matrix.shape=torch.Size([11998, 4])
loss_matrix.shape=torch.Size([11997, 4])
loss_matrix.shape=torch.Size([11996, 4])
loss_matrix.shape=torch.Size([11995, 4])
loss_matrix.shape=torch.Size([11994, 4])
loss_matrix.shape=torch.Size([11993, 4])
loss_matrix.shape=torch.Size([11992, 4])
loss_matrix.shape=torch.Size([11991, 4])
loss_matrix.shape=torch.Size([11990, 4])
loss_matrix.shape=torch.Size([11989, 4])
loss_matrix.shape=torch.Size([11988, 4])
loss_matrix.shape=torch.Size([11987, 4])
loss_matrix.shape=torch.Size([11986, 4])
loss_matrix.shape=torch.Size([11985, 4])
loss_matrix.shape=torch.Size([11984, 4])
loss_matrix.shape=torch.Size([11983

KeyboardInterrupt: 

In [None]:
task_id = 3
inc_num = 2
benchmark.class_idx[(task_id-1)*(inc_num):task_id*inc_num]
benchmark.class_idx[:(task_id-1)*(inc_num)]

array([0, 1, 2, 3])

In [None]:
k=0, loss_.shape=torch.Size([1, 1])
k=0, grads_.shape=torch.Size([1, 2570])
k=1, loss_.shape=torch.Size([1, 1])
k=1, grads_.shape=torch.Size([1, 2570])
k=2, loss_.shape=torch.Size([1, 1])
k=2, grads_.shape=torch.Size([1, 2570])
k=3, loss_.shape=torch.Size([1, 1])
k=3, grads_.shape=torch.Size([1, 2570])
loss_matrix.shape=torch.Size([3783, 4])
3 is missing
k=0, loss_.shape=torch.Size([1, 1])
k=0, grads_.shape=torch.Size([1, 2570])
k=1, loss_.shape=torch.Size([1, 1])
k=1, grads_.shape=torch.Size([1, 2570])
k=2, loss_.shape=torch.Size([1, 1])
k=2, grads_.shape=torch.Size([1, 2570])
k=3, loss_.shape=torch.Size([1, 0])
k=3, grads_.shape=torch.Size([1, 0])


SyntaxError: cannot assign to literal (898532129.py, line 1)

In [None]:
---------------------------- Task 1 -----------------------
[1] Eval metrics for task 1 >> {'accuracy': 98.6, 'loss': 0.00015132083231583237}
training_task_end
load_memory_joint: len(train_loader.dataset)=64
---------------------------- Task 2 -----------------------
len(select_curr_indexes)=5075
[2] Eval metrics for task 1 >> {'accuracy': 85.85, 'loss': 0.0034543287754058836}
[2] Eval metrics for task 2 >> {'accuracy': 86.6, 'loss': 0.0027514055371284487}
training_task_end
load_memory_joint: len(train_loader.dataset)=128
---------------------------- Task 3 -----------------------
len(select_curr_indexes)=9259
[3] Eval metrics for task 1 >> {'accuracy': 82.85, 'loss': 0.005165461122989655}
[3] Eval metrics for task 2 >> {'accuracy': 59.0, 'loss': 0.007712359070777893}
[3] Eval metrics for task 3 >> {'accuracy': 97.55, 'loss': 0.0003111732173711061}
training_task_end
load_memory_joint: len(train_loader.dataset)=192
---------------------------- Task 4 -----------------------
len(select_curr_indexes)=11205
[4] Eval metrics for task 1 >> {'accuracy': 66.7, 'loss': 0.010747700095176697}
[4] Eval metrics for task 2 >> {'accuracy': 48.0, 'loss': 0.012582610368728637}
[4] Eval metrics for task 3 >> {'accuracy': 57.25, 'loss': 0.008546485185623169}
[4] Eval metrics for task 4 >> {'accuracy': 92.6, 'loss': 0.0011509314700961113}
training_task_end
load_memory_joint: len(train_loader.dataset)=256
---------------------------- Task 5 -----------------------
len(select_curr_indexes)=7916
[5] Eval metrics for task 1 >> {'accuracy': 74.3, 'loss': 0.007032525479793548}
[5] Eval metrics for task 2 >> {'accuracy': 56.75, 'loss': 0.011757457256317138}
[5] Eval metrics for task 3 >> {'accuracy': 63.9, 'loss': 0.00855764478445053}
[5] Eval metrics for task 4 >> {'accuracy': 80.9, 'loss': 0.003836171865463257}
[5] Eval metrics for task 5 >> {'accuracy': 92.7, 'loss': 0.0013385150507092475}
training_task_end
load_memory_joint: len(train_loader.dataset)=320
final avg-acc 73.71000000000001
final avg-forget 24.874999999999993


In [None]:
taskd

NameError: name 'taskd' is not defined

In [None]:
np.delete(np.random.randint(0, 10, size=10))


TypeError: delete() missing 1 required positional argument: 'obj'

In [None]:
import numpy as np
a = np.random.choice(10, size=10, replace=False).tolist()
sorted(a)
a

[3, 7, 8, 4, 9, 0, 2, 1, 6, 5]

In [None]:
for x in range(5):
    if len(classwise_loss[x]) == 0:
        del classwise_loss[x]

In [None]:
from torch.utils.data import DataLoader, Dataset, Subset, ConcatDataset

task = 1
trainset = Subset(benchmark.trains[task], range(100))
batch_size = 32
shuffle=False
num_workers = 0
pin_memory = True

train_loader = DataLoader(trainset, batch_size, shuffle, num_workers=num_workers,
                                  pin_memory=pin_memory)

In [None]:
for batch_idx, (inp, targ, t_id, *_) in enumerate(train_loader):
    print(targ)

tensor([0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0,
        1, 1, 1, 1, 1, 1, 1, 1])
tensor([0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
        0, 1, 0, 0, 1, 1, 1, 1])
tensor([1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1,
        0, 1, 1, 1, 0, 1, 0, 0])
tensor([1, 0, 0, 0])


In [None]:
benchmark.trains[task].targets[32:64]

tensor([0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
        0, 1, 0, 0, 1, 1, 1, 1])

In [None]:
len(benchmark.trains[task].targets)

12000

In [None]:
targets = train_loader.dataset.dataset.targets
targets.unique().cpu().numpy()

array([0, 1])

In [None]:
num_dict = {x:0 for x in targets.unique().cpu().numpy()}
for k in num_dict:
    print(k)

0
1


In [None]:
len(num_dict)

2

In [None]:
(targets==0).nonzero()

tensor([[    0],
        [    1],
        [    2],
        ...,
        [11994],
        [11995],
        [11999]])

In [None]:
targets

tensor([0, 0, 0,  ..., 1, 1, 0])

In [None]:
torch.where(targets==0)[0]

tensor([    0,     1,     2,  ..., 11994, 11995, 11999])

In [None]:
device = params['device']

for i in range(5):
    torch.cuda.empty_cache()
    if i == 0:
        x_train_batch = torch.cat(all_train_data[i*2:i*2+2])
        y_train_batch = torch.cat(all_train_label[i*2:i*2+2])

    elif i > 0:
        x_train_batch = torch.cat(all_train_data[i*2:i*2+2])
        y_train_batch = torch.cat(all_train_label[i*2:i*2+2])
        if i == 1:
            x_prev_buffer1 = torch.cat(all_train_data[i*2-2:i*2-1])
            y_prev_buffer1 = torch.cat(all_train_label[i*2-2:i*2-1])
            indices1 = torch.randperm(len(x_prev_buffer1))[:buffer_size]
            x_prev_buffer1 = x_prev_buffer1[indices1]
            y_prev_buffer1 = y_prev_buffer1[indices1]

            x_prev_buffer2 = torch.cat(all_train_data[i*2-1:i*2])
            y_prev_buffer2 = torch.cat(all_train_label[i*2-1:i*2])
            indices2 = torch.randperm(len(x_prev_buffer2))[:buffer_size]
            x_prev_buffer2 = x_prev_buffer2[indices2]
            y_prev_buffer2 = y_prev_buffer2[indices2]

            x_prev_buffer = torch.cat([x_prev_buffer1, x_prev_buffer2])
            y_prev_buffer = torch.cat([y_prev_buffer1, y_prev_buffer2])

        else:

            # select gradient-based herding buffer
            x_new_buffer = torch.cat(all_train_data[i*2-2:i*2])
            y_new_buffer = torch.cat(all_train_label[i*2-2:i*2])

            x_new_buffer = x_new_buffer[select_buffer_indexes]
            y_new_buffer = y_new_buffer[select_buffer_indexes]

            x_prev_buffer = torch.cat([x_prev_buffer, x_new_buffer])
            y_prev_buffer = torch.cat([y_prev_buffer, y_new_buffer])
            
#                     print('memory2:', torch.cuda.memory_allocated(device=device)/1024/1024/1024)

        # buffer에 있는 class별 loss가 buffer_losses list에 들어감
        buffer_losses = []
        

        # computation of mean gradients and losses for buffers 
        for n in range(i*2):
            buffer_ind = [m for m in range(len(y_prev_buffer)) if y_prev_buffer[m] == n]
            x_buffer = x_prev_buffer[buffer_ind]
            y_buffer = y_prev_buffer[buffer_ind]
            x_buffer = torch.Tensor(x_buffer).to(device, dtype=torch.float32)
            y_buffer = torch.Tensor(y_buffer).to(device, dtype=torch.int64)

            if n == 0:
                model.zero_grad()

                # mean gradient computation
                out, emb = model(x_buffer)

                init_out_buffer = out
                init_emb_buffer = emb
                init_y_buffer = y_buffer.view(-1, 1)


                loss = loss_sample(out, y_buffer).sum()
                buffer_l0_grads = torch.autograd.grad(loss, out)[0] #torch.Size([32, 10])
                if dataset in ['MNIST', 'FMNIST']:
                    buffer_l0_expand = torch.repeat_interleave(buffer_l0_grads, 256, dim=1)
                else:
                    buffer_l0_expand = torch.repeat_interleave(buffer_l0_grads, 512, dim=1)
                buffer_l1_grads = buffer_l0_expand * emb.repeat(1, n_class)

                buffer_l0_grads = buffer_l0_grads.mean(dim=0).view(1, -1)
                buffer_l1_grads = buffer_l1_grads.mean(dim=0).view(1, -1)

                # mean loss computation
                buffer_ds = TensorDataset(x_buffer, y_buffer)
                buffer_loader = DataLoader(dataset=buffer_ds, batch_size=batch_size, shuffle=True)
                buffer_output, buffer_loss = clf.evaluate(buffer_loader)
                buffer_losses.append(buffer_loss)

            else:
                model.zero_grad()

                # mean gradient computation
                out, emb = model(x_buffer)

                init_out_buffer = torch.cat((init_out_buffer, out), dim=0)
                init_emb_buffer = torch.cat((init_emb_buffer, emb), dim=0)
                init_y_buffer = torch.cat((init_y_buffer, y_buffer.view(-1, 1)), dim=0)

                loss = loss_sample(out, y_buffer).sum()
                batch_l0_grads = torch.autograd.grad(loss, out)[0]
                if dataset in ['MNIST', 'FMNIST']:
                    batch_l0_expand = torch.repeat_interleave(batch_l0_grads, 256, dim=1)
                else:
                    batch_l0_expand = torch.repeat_interleave(batch_l0_grads, 512, dim=1)

                batch_l1_grads = batch_l0_expand * emb.repeat(1, n_class)

                batch_l0_grads = batch_l0_grads.mean(dim=0).view(1, -1)
                batch_l1_grads = batch_l1_grads.mean(dim=0).view(1, -1)

                buffer_l0_grads = torch.cat((buffer_l0_grads, batch_l0_grads), dim=0)
                buffer_l1_grads = torch.cat((buffer_l1_grads, batch_l1_grads), dim=0)

                # mean loss computation
                buffer_ds = TensorDataset(x_buffer, y_buffer)
                buffer_loader = DataLoader(dataset=buffer_ds, batch_size=batch_size, shuffle=True)
                buffer_output, buffer_loss = clf.evaluate(buffer_loader)
                buffer_losses.append(buffer_loss)
                
        
        # initialize individual sample gradients of new data
        new_l0_grads = torch.empty((0, n_class), device=device, dtype=torch.float32)
        if dataset in ['MNIST', 'FMNIST']:
            new_l1_grads = torch.empty((0, n_class*256), device=device, dtype=torch.float32)
        else:
            new_l1_grads = torch.empty((0, n_class*512), device=device, dtype=torch.float32)

        # computation of mean and individual gradients and mean losses for new data 
        for n in range(2):
            model.zero_grad()

            # mean gradient computation
            x_new_buffer = torch.cat(all_train_data[i*2+n:i*2+n+1])
            y_new_buffer = torch.cat(all_train_label[i*2+n:i*2+n+1])
            x_new_buffer = torch.Tensor(x_new_buffer).to(device, dtype=torch.float32)
            y_new_buffer = torch.Tensor(y_new_buffer).to(device, dtype=torch.int64)
            
            new_buffer_ds = TensorDataset(x_new_buffer, y_new_buffer)
            new_buffer_loader = DataLoader(dataset=new_buffer_ds, batch_size=batch_size, shuffle=False)
            
            for batch_idx, batch_data in enumerate(new_buffer_loader):
                model.zero_grad()
                if batch_idx == 0:
                    x_batch, y_batch = batch_data

                    out, emb = model(x_batch)

                    init_out_buffer = torch.cat((init_out_buffer, out), dim=0)
                    init_emb_buffer = torch.cat((init_emb_buffer, emb), dim=0)
                    init_y_buffer = torch.cat((init_y_buffer, y_batch.view(-1, 1)), dim=0)

                    loss = loss_sample(out, y_batch).sum()
                    batch_l0_grads = torch.autograd.grad(loss, out)[0]
                    
                    with torch.no_grad():
                        if dataset in ['MNIST', 'FMNIST']:
                            batch_l0_expand = torch.repeat_interleave(batch_l0_grads, 256, dim=1)
                        else:
                            batch_l0_expand = torch.repeat_interleave(batch_l0_grads, 512, dim=1)
                        batch_l1_grads = batch_l0_expand * emb.repeat(1, n_class)
                    
                else:
                    
                    x_batch, y_batch = batch_data

                    out, emb = model(x_batch)

                    init_out_buffer = torch.cat((init_out_buffer, out), dim=0)
                    init_emb_buffer = torch.cat((init_emb_buffer, emb), dim=0)
                    init_y_buffer = torch.cat((init_y_buffer, y_batch.view(-1, 1)), dim=0)

                    loss = loss_sample(out, y_batch).sum()
                    next_batch_l0_grads = torch.autograd.grad(loss, out)[0]
                    
                    with torch.no_grad():
                        if dataset in ['MNIST', 'FMNIST']:
                            next_batch_l0_expand = torch.repeat_interleave(next_batch_l0_grads, 256, dim=1)
                        else:
                            next_batch_l0_expand = torch.repeat_interleave(next_batch_l0_grads, 512, dim=1)
                        next_batch_l1_grads = next_batch_l0_expand * emb.repeat(1, n_class)

                        batch_l0_grads = torch.cat((batch_l0_grads, next_batch_l0_grads), dim=0)
                        batch_l1_grads = torch.cat((batch_l1_grads, next_batch_l1_grads), dim=0)

            # individual gradients
            ind_l0_grads = batch_l0_grads.clone()
            ind_l1_grads = batch_l1_grads.clone()
            
            new_l0_grads = torch.cat((new_l0_grads, ind_l0_grads), dim=0)
            new_l1_grads = torch.cat((new_l1_grads, ind_l1_grads), dim=0)
                    
            # mean gradients
            batch_l0_grads = batch_l0_grads.mean(dim=0).view(1, -1)
            batch_l1_grads = batch_l1_grads.mean(dim=0).view(1, -1)

            buffer_l0_grads = torch.cat((buffer_l0_grads, batch_l0_grads), dim=0)
            buffer_l1_grads = torch.cat((buffer_l1_grads, batch_l1_grads), dim=0)

            # mean loss computation
            buffer_ds = TensorDataset(x_new_buffer, y_new_buffer)
            buffer_loader = DataLoader(dataset=buffer_ds, batch_size=batch_size, shuffle=True)
            buffer_output, buffer_loss = clf.evaluate(buffer_loader)
            buffer_losses.append(buffer_loss)
            
#                     print('memory4:', torch.cuda.memory_allocated(device=device)/1024/1024/1024)
            
        torch.cuda.empty_cache()

        buffer_grads = torch.cat((buffer_l0_grads, buffer_l1_grads), dim=1)
        buffer_grads = f.normalize(buffer_grads, p=2, dim=1)

        ######################################

        torch.cuda.empty_cache()
        
        with torch.no_grad():

            new_grads = torch.cat((new_l0_grads, new_l1_grads), dim=1)

            new_grads_origin = new_grads.clone()
            new_grads = f.normalize(new_grads, p=2, dim=1)
#                         print('new grads:', new_grads.shape)
#                         print('new grads norm:', torch.norm(new_grads, dim=1))
#                         print('new grads shape:', new_grads, new_grads.shape)

            buffer_losses = torch.tensor(buffer_losses).view(1,-1)
#             print('initial loss:', buffer_losses)
#             print('initial mean, std:', buffer_losses.mean(dim=1).item(), buffer_losses.std(dim=1).item())

            loss_matrix = buffer_losses.repeat(len(new_grads), 1).to(device)
            loss_matrix_origin = loss_matrix.clone()
            forget_matrix = torch.matmul(new_grads, torch.transpose(buffer_grads, 0, 1)).to(device)

#                     print('init forget matrix shape:', forget_matrix.shape)

#                     print('init memory:', torch.cuda.memory_allocated(device=device)/1024/1024/1024)

        accumulate_select_indexes = []
        accumulate_mean = []
        accumulate_std = []
        accumulate_sum = []

        select_indexes = []
        non_select_indexes = list(range(len(x_train_batch)))

        num_class1 = 0
        num_class2 = 0

        # current data selection
        for b in range(len(x_train_batch)):
            torch.cuda.empty_cache()
            loss_matrix = loss_matrix - alpha * forget_matrix
            loss_mean = torch.mean(loss_matrix, dim=1, keepdim=True)
            loss_std = torch.std(loss_matrix, dim=1, keepdim=True)

            # select_ind = torch.argmin(loss_mean, dim=0)
            # select_ind = torch.argmin(loss_std, dim=0)
            select_ind = torch.argmin(loss_mean + loss_std, dim=0)

            accumulate_mean.append(copy.deepcopy(loss_mean[select_ind].item()))
            accumulate_std.append(copy.deepcopy(loss_std[select_ind].item()))
            accumulate_sum.append(copy.deepcopy(loss_mean[select_ind].item() + loss_std[select_ind].item()))

            if non_select_indexes[select_ind.item()] < len(x_train_batch)/2:
                num_class1 += 1
            else:
                num_class2 += 1

            # metrics인듯?
            select_indexes.append(non_select_indexes[select_ind.item()])
            accumulate_select_indexes.append(copy.deepcopy(select_indexes))
            del non_select_indexes[select_ind.item()]

            best_buffer_losses = loss_matrix[select_ind].view(1,-1)
            loss_matrix = best_buffer_losses.repeat(len(new_grads)-1, 1).to(device)
            new_grads = torch.cat((new_grads[:select_ind.item()], new_grads[select_ind.item()+1:]))
            forget_matrix = torch.cat((forget_matrix[:select_ind.item()], forget_matrix[select_ind.item()+1:]))
        best_ind = np.argmin(np.array(accumulate_sum))
        select_curr_indexes = accumulate_select_indexes[best_ind]

        # best_ind=11999
        # len(select_curr_indexes)=12000
        # len(accumulate_select_indexes)=12000


        # best_ind=7337
        # len(select_curr_indexes)=7338
        # len(accumulate_select_indexes)=12000




        # buffer data selection
        select_buffer_indexes = []

        class1_indexes = list(range(0, int(len(x_train_batch)/2)))
        class2_indexes = list(range(int(len(x_train_batch)/2), len(x_train_batch)))

        class1_grad_mean = new_grads_origin[class1_indexes].mean(dim=0).view(1, -1)
        class2_grad_mean = new_grads_origin[class2_indexes].mean(dim=0).view(1, -1)

        candidate_class1_indexes = list(range(0, int(len(x_train_batch)/2)))
        class1_buffer_indexes = []
        for m in range(buffer_size):
            buffer_ind = torch.argmin(
                torch.norm(class1_grad_mean.repeat(int(len(new_grads_origin)/2)-m, 1) \
                        - (torch.sum(new_grads_origin[class1_buffer_indexes], dim=0)
                              .repeat(int(len(new_grads_origin)/2)-m,1) \
                        + new_grads_origin[candidate_class1_indexes])/(m+1), dim=1), dim=0)
            class1_buffer_indexes.append(copy.deepcopy(candidate_class1_indexes[buffer_ind]))
            del candidate_class1_indexes[buffer_ind]

        candidate_class2_indexes = list(range(int(len(x_train_batch)/2), len(x_train_batch)))
        class2_buffer_indexes = []
        for m in range(buffer_size):
            buffer_ind = torch.argmin(torch.norm(class2_grad_mean.repeat(int(len(new_grads_origin)/2)-m, 1) \
                            - (torch.sum(new_grads_origin[class2_buffer_indexes], dim=0)
                               .repeat(int(len(new_grads_origin)/2)-m,1) \
                            + new_grads_origin[candidate_class2_indexes])/(m+1), dim=1), dim=0)
            class2_buffer_indexes.append(copy.deepcopy(candidate_class2_indexes[buffer_ind]))
            del candidate_class2_indexes[buffer_ind]

        for ind in class1_buffer_indexes:
            select_buffer_indexes.append(ind)

        for ind in class2_buffer_indexes:
            select_buffer_indexes.append(ind)

        select_curr_indexes = list(set(select_curr_indexes))

        # current data selection
        x_train_batch = x_train_batch[select_curr_indexes]
        y_train_batch = y_train_batch[select_curr_indexes]

    x_train_batch = torch.Tensor(x_train_batch).to(device, dtype=torch.float32)
    y_train_batch = torch.Tensor(y_train_batch).to(device, dtype=torch.int64)

    train_ds = TensorDataset(x_train_batch, y_train_batch)
    train_loader = DataLoader(dataset=train_ds, batch_size=batch_size, shuffle=True)

    optimizer_config = {"lr": 0.001}
    
    model.zero_grad()

    # Model training using current data and buffer data
    if i == 0:
        clf = NNClassifier(model, nn.CrossEntropyLoss(reduction='mean'), optim.Adam, optimizer_config)
        clf.fit({"train": train_loader, "val": train_loader}, epochs=1, earlystop_path=f'./ckpt/joint.pt')
    elif i > 0:
#                     print('len prev buffer:', len(x_prev_buffer))
        clf = NNClassifier_CL(model, nn.CrossEntropyLoss(reduction='mean'), optim.Adam, optimizer_config)
        clf.fit({"train": train_loader, "val": train_loader, "buffer": (x_prev_buffer, y_prev_buffer)}, 
                epochs=1, sample_size=64, lamb=lamb, device=device, earlystop_path=f'./ckpt/joint.pt', seed=s)

    # Model evaluation
    all_test_acc = []
    all_test_loss = []

    # evaluation using sci-kit learn tool
    for j in range(n_class):

        if j < i*2+2:

            x_test_batch = all_test_data[j]
            y_test_batch = all_test_label[j]

            x_test_batch = torch.Tensor(x_test_batch).to(device, dtype=torch.float32)
            y_test_batch = torch.Tensor(y_test_batch).to(device, dtype=torch.int64)

            test_ds = TensorDataset(x_test_batch, y_test_batch)
            test_loader = DataLoader(dataset=test_ds, batch_size=batch_size, shuffle=True)

            test_output, test_loss = clf.evaluate(test_loader)
            test_acc = accuracy_score(test_output['true_y'], test_output['output'])

            cf_li = list(confusion_matrix(test_output['true_y'], test_output['output'], labels=range(i*2+2))[j])

            cf_matrix = {}
            for k in range(len(cf_li)):
                cf_matrix[label_li[k]] = cf_li[k]

            all_test_acc.append(test_acc)
            all_test_loss.append(test_loss)
            seq_acc[j].append(test_acc)

        else:
            seq_acc[j].append(0)

#                 print('overall: acc = %.3f, fair = %.3f' %(np.mean(all_test_acc), np.std(all_test_acc)))
    overall_acc.append(np.mean(all_test_acc))
    overall_fair.append(np.std(all_test_acc))


all_overall_acc.append(np.mean(overall_acc))
all_overall_fair.append(np.mean(overall_fair))

alpha_dict[alpha].append([np.mean(all_overall_acc), np.mean(all_overall_fair)])
print(alpha)

print('alpha:', alpha, alpha_dict[alpha])
print('avg:', np.mean([e[0] for e in alpha_dict[alpha]]), np.mean([e[1] for e in alpha_dict[alpha]]))
print('\n')

print('-------------------------------------------------------------------------------------------------')
        
for k, v in alpha_dict.items():
    print(k, v)
    print('avg:', np.mean([e[0] for e in v]), np.mean([e[1] for e in v]))
    print('\n')

NameError: name 'all_train_data' is not defined