In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys, os, time
import numpy as np
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
import torch.optim as optim

import pickle
import torch
import random
import utils

import data_handler
from sklearn.utils import shuffle
import trainer
import networks

import random


In [None]:
args = {}

# general experimental setting 
args['date'] = '230828'
args['dataset'] = 'MNIST'
args['trainer'] = 'er' # ER or EWC
args['seed'] = 0
args['output_path'] = '' # this is corrected at the next cell

# setting for continual learning 
args['tasknum'] = 5

#hyperparameter for optimization
args['batch_size'] = 256
args['lr'] = 0.001
args['epochs'] = 10
args['decay'] = 0 # weight decay (L2 penaly for general regularization)

args['schedule_milestone'] = [7] #Decrease learning rate at these epochs
args['gamma'] = 0.2


#hyperparameter for ER
args['memory_size'] = 500
args['ratio'] = 0.5

# for GPU, if you cannot use GPU, set device as None
device = 0
if device is not None:
    torch.cuda.set_device(device)

In [None]:
# Make directory for saving datasets, output files and trained models.
if not os.path.isdir('dat'):
    print('Make directory for dataset')
    os.makedirs('dat')

if not os.path.isdir('result_data'):
    print('Make directory for saving results')
    os.makedirs('result_data')

if not os.path.isdir('trained_model'):
    print('Make directory for saving trained models')
    os.makedirs('trained_model')

# Make filename of a file for logging a result
log_name = '{}_{}_{}_{}_memsize_{}_lr_{}_batch_{}_epoch_{}'.format(args['date'], args['dataset'], args['trainer'],args['seed'], 
                                                                       args['memory_size'], args['lr'], args['batch_size'], args['epochs'])

if args['output_path'] == '':
    args['output_path'] = './result_data/' + log_name + '.txt'




In [None]:
# Fix seed for deterministic results
np.random.seed(args['seed'])
random.seed(args['seed'])
torch.manual_seed(args['seed'])
torch.backends.cudnn.deterministic = True
# device = torch.device("gpu")
# torch.backends.cudnn.benchmark = False


In [None]:
#Load a dataset and a dataloader that outputs a task sequentially
print('Load data...')
data_dict = None
dataset = data_handler.DatasetFactory.get_dataset(args['dataset'], args['tasknum'])
task_info = dataset.task_info
print('\nTask info =', task_info)

# Loader used for training data
shuffle_idx = shuffle(np.arange(dataset.classes), random_state=args['seed'])

# list of dataloaders: it consists of dataloaders for each task
train_dataset_loaders = data_handler.make_ContinualLoaders(dataset.train_data,
                                                        dataset.train_labels,
                                                        task_info,
                                                        transform=dataset.train_transform,
                                                        shuffle_idx = shuffle_idx,
                                                        data_dict = data_dict,
                                                       )

test_dataset_loaders = data_handler.make_ContinualLoaders(dataset.test_data,
                                                       dataset.test_labels,
                                                       task_info,
                                                       transform=dataset.test_transform,
                                                       shuffle_idx = shuffle_idx,
                                                       data_dict = data_dict,
                                                      )



In [None]:
# Get the required model
# myModel = networks.ModelFactory.get_model(args['dataset'], args['trainer'], task_info).to(device)
if device is not None:
    myModel = networks.ModelFactory.get_model(args['dataset'], args['trainer'], task_info).to(device)
else:
    myModel = networks.ModelFactory.get_model(args['dataset'], args['trainer'], task_info)

# Define the optimizer used in the experiment
optimizer = torch.optim.Adam(myModel.parameters(), lr=args['lr'], weight_decay=args['decay'])

# Initilize the evaluators used to measure the performance of the system.
t_classifier = trainer.EvaluatorFactory.get_evaluator("trainedClassifier")



In [None]:
class Trainer(trainer.GenericTrainer):
    def __init__(self, model, args, optimizer, evaluator, task_info):
        super().__init__(model, args, optimizer, evaluator, task_info)
        
        self.memory_size = args['memory_size']
        self.replay_memory = {}
        self.memory_ratio = args['ratio']
        self.N_total_data = 0

    def train(self, train_loader, test_loader, t, device = None):
        
        self.device = device
        self.setup_training(self.lr)
        # Do not update self.t
        
        #update the total number of data we've seen
        self.N_total_data += len(train_loader)
        
        # Now, you can update self.t
        self.t = t
        if self.t != 0:
            batch_size = round(self.batch_size * (1-self.memory_ratio))
            self.train_iterator = torch.utils.data.DataLoader(train_loader, batch_size=batch_size, shuffle=True)
        else:
            self.train_iterator = torch.utils.data.DataLoader(train_loader, batch_size=self.batch_size, shuffle=True)
        self.test_iterator = torch.utils.data.DataLoader(test_loader, 100, shuffle=False)
        self.fisher_iterator = torch.utils.data.DataLoader(train_loader, batch_size=20, shuffle=True)
        
        for epoch in range(self.epochs):
            self.model.train()
            for samples in self.train_iterator:
                data, target = samples
                if device is not None:
                    data, target = data.to(device), target.to(device)
                if self.t != 0:
                    data, target, taskid = self.sample_with_memory(data, target)
                    output = self.model(data)
                    tmp = torch.zeros_like(output[0]).to(device)
                    for _t in range(self.t+1):
                        tmp[taskid==_t] = output[_t][taskid==_t]
                    output = tmp
                else:
                    output = self.model(data)[t]
                loss_CE = self.criterion(output,target)

                self.optimizer.zero_grad()
                (loss_CE).backward()
                self.optimizer.step()
            self.scheduler.step()

            train_loss,train_acc = self.evaluator.evaluate(self.model, self.train_iterator, t, self.device)
            num_batch = len(self.train_iterator)
            print('| Epoch {:3d} | Train: loss={:.3f}, acc={:5.1f}% |'.format(epoch+1,train_loss,100*train_acc),end='')
            test_loss,test_acc=self.evaluator.evaluate(self.model, self.test_iterator, t, self.device)
            print(' Test: loss={:.3f}, acc={:5.1f}% |'.format(test_loss,100*test_acc),end='')
            print()
        self.update_memory(self.replay_memory, self.train_iterator, self.memory_size, device)
        
    def criterion(self,output,targets):
        """
        Arguments: output (The output logit of self.model), targets (Ground truth label)
        Return: loss function for the regularization-based continual learning
        
        For the hyperparameter on regularization, please use self.lamb
        """
        
        return self.ce(output,targets)


    def sample_with_memory(self, data, target):
        """
        Arguments : data, target that are sampled from a dataset of current task
        Return : Combined data and target with memory_data and memo
        """
        #######################################################################################
        #write your code
        
        #######################################################################################
        
        return data, target, taskid
        
    def update_memory(self, memory, loader, memory_size, device = None):
        
        #######################################################################################
        #write your code
        
        #######################################################################################

        
        print('Memory update ended')

            

In [None]:
# Trainer object used for training
myTrainer = Trainer(myModel, args, optimizer, t_classifier, task_info)


In [None]:
########################################################################################################################

utils.print_model_report(myModel)
utils.print_optimizer_config(optimizer)
print('-' * 100)

# Loop tasks
acc = np.zeros((len(task_info), len(task_info)), dtype=np.float32)
lss = np.zeros((len(task_info), len(task_info)), dtype=np.float32)
for t, ncla in task_info:
    print("tasknum:", t)
    # Add new classes to the train, and test iterator

    train_loader = train_dataset_loaders[t]
    test_loader = test_dataset_loaders[t]
    myTrainer.train(train_loader, test_loader, t, device)

    for u in range(t+1):
        test_loader = test_dataset_loaders[u]
        test_iterator = torch.utils.data.DataLoader(test_loader, 100, shuffle=False)
        test_loss, test_acc = t_classifier.evaluate(myTrainer.model, test_iterator, u, device)
        print('>>> Test on task {:2d}: loss={:.3f}, acc={:5.1f}% <<<'.format(u, test_loss, 100 * test_acc))
        acc[t, u] = test_acc
        lss[t, u] = test_loss

    print('Average accuracy={:5.1f}%'.format(100 * np.mean(acc[t,:t+1])))

    print('Save at ' + args['output_path'])
    np.savetxt(args['output_path'], acc, '%.4f')
    torch.save(myModel.state_dict(), './trained_model/' + log_name + '_task_{}.pt'.format(t))


print('*' * 100)
print('Accuracies =')
for i in range(acc.shape[0]):
    print('\t', end='')
    for j in range(acc.shape[1]):
        print('{:5.1f}% '.format(100 * acc[i, j]), end='')
    print()
print('*' * 100)
print('Done!')



In [None]:
def avg_acc(file_name):
    acc_arr = np.loadtxt(file_name)
    avg_acc_arr = np.zeros(acc_arr.shape[1])
    for i in range(acc_arr.shape[1]):
        avg_acc_arr[i] = np.mean(acc_arr[i][:i+1])
    
    return avg_acc_arr
filename_ewc = ''
filename_er = ''
results_er = avg_acc(filename_er)
results_ewc = avg_acc(filename_ewc)
print(results_er)
print(results_ewc)


In [None]:
import matplotlib.pyplot as plt
task_num = args['tasknum']
task = np.arange(task_num) +1
ax = plt.subplot(111)
    
# for key in results.keys():
# ax.plot(task, results[key], label = key, linestyle = '-', marker = '.')
ax.plot(task, results_ewc, label = 'EWC', linestyle = '-', marker = '.')
ax.plot(task, results_er, label = 'ER', linestyle = '-', marker = '.')

box = ax.get_position()
ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
# if len(task)>20:
#     plt.xticks([i for i in task if i/10 == 0])
# else:
plt.xticks(task)

plt.xlabel('Task',fontsize = 20)
plt.ylabel('Accuracy',fontsize = 20)

ax.legend(loc = 'center right', bbox_to_anchor=(1.3, 0.5))
plt.show()



