In [136]:
from dataloader_utils import get_conbined_permute_mnist, get_conbined_split_mnist, get_conbined_permute_and_split_mnist
from autoencoder import Autoencoder
from autoencoder_utils import * 

import numpy as np

from tqdm.notebook import tqdm

import matplotlib.pyplot as plt


In [137]:
def train(train_loader):
    auto_list = {}
    expert_list = {}

    #debug
    record = {}

    #https://stats.stackexchange.com/questions/521461/train-a-model-on-batches-with-multiple-epochs-vs-each-batch-with-multiple-epoch
    #for i, data in enumerate(train_loader):
    for i, data in tqdm(enumerate(train_loader), total=len(train_loader)):
        images, labels, indicies = data
        
        #debug
        #show_image = images[0].cpu().detach().numpy().reshape((28,28))
        #plt.imshow(show_image) # Plot the 28x28 image
        #plt.show()
        
        #initial 
        if len(auto_list)==0:
            #debug
            print(f"[@ batch {i}] NEW autoencoder at {len(auto_list)} for Task {indicies[0].item()}")
            record[len(auto_list)] = indicies[0].item()

            #initial autoencoder
            new_autoencoder = Autoencoder(input_dims=28*28, code_dims=CODE_DIM)
            for epoch in range(NEW_AUTOENCODER_EPOCH):
                new_autoencoder.optimize_params(images, images)
            auto_list[len(auto_list)] = new_autoencoder

            #to-do add initial expert
            expert_list[len(auto_list)-1] = BATCH_SIZE
            continue

        #find best autoencoder
        best_index = find_best_autoencoders(images, auto_list)
        best_autoencoder = auto_list[best_index]


        #calculate outliers
        outliers = find_num_of_outliers(images, best_autoencoder)
        #print(f"[@ batch {i}] outliers for best autoencoders {best_index}: {outliers}")

        if outliers > OUTLIER_THRESHOLD:
            #debug
            print(f"[@ batch {i}] outliers for best autoencoders at index: {best_index} : {outliers}")
            print(f"[@ batch {i}] NEW autoencoder at {len(auto_list)} for Task: {indicies[0].item()}")
            if indicies[0].item() in record.values():
                print(f"[@ batch {i}] DUPLICATE autoencoder for Task: {indicies[0].item()}")
                record[len(auto_list)] = indicies[0].item()
            else:
                record[len(auto_list)] = indicies[0].item()

            #add new autoencoder
            best_autoencoder = Autoencoder(input_dims=28*28, code_dims=CODE_DIM)
            for epoch in range(NEW_AUTOENCODER_EPOCH):
                best_autoencoder.optimize_params(images, images)
            auto_list[len(auto_list)] = best_autoencoder

            #to-do add new expert
            expert_list[len(auto_list)-1] = BATCH_SIZE
            
        else:
            #debug
            #print(f"training autoencoder at {best_index} with index: {indicies[0].item()}")
            if not indicies[0].item() in record.values():
                print(f"[@ batch {i}] outliers for best autoencoders at index: {best_index} : {outliers}")
                print(f"[@ batch {i}] MISSING autoencoder with Task {indicies[0].item()}")
                continue
                
            #train best autoencoder
            for epoch in range(TRAIN_AUTOENCODER_EPOCH):
                best_autoencoder.optimize_params(images, images)

            #to-do train exsisting expert
            expert_list[best_index] += BATCH_SIZE

        if i % (len(train_loader)/20) ==0 :
            print(expert_list)
            
    print("Complete!")
    
    #debug
    print(expert_list)
    
    return auto_list, expert_list

In [138]:
BATCH_SIZE = 300

OUTLIER_THRESHOLD = 0.2*BATCH_SIZE
NEW_AUTOENCODER_EPOCH = 100
TRAIN_AUTOENCODER_EPOCH = 10
CODE_DIM = 350

NUM_TASK = 10
RANDOM_SEED = np.random.randint(100)
#RANDOM_SEED = 42

In [139]:
train_loader, test_loader = get_conbined_permute_mnist(NUM_TASK, BATCH_SIZE, RANDOM_SEED)

In [140]:
auto_list, expert_list = train(train_loader)

  0%|          | 0/2000 [00:00<?, ?it/s]

[@ batch 0] NEW autoencoder at 0 for Task 9
[@ batch 1] outliers for best autoencoders at index: 0 : 300
[@ batch 1] NEW autoencoder at 1 for Task: 0
[@ batch 2] outliers for best autoencoders at index: 0 : 300
[@ batch 2] NEW autoencoder at 2 for Task: 8
[@ batch 3] outliers for best autoencoders at index: 0 : 300
[@ batch 3] NEW autoencoder at 3 for Task: 5
[@ batch 5] outliers for best autoencoders at index: 2 : 300
[@ batch 5] NEW autoencoder at 4 for Task: 6
[@ batch 8] outliers for best autoencoders at index: 2 : 300
[@ batch 8] NEW autoencoder at 5 for Task: 4
[@ batch 10] outliers for best autoencoders at index: 4 : 300
[@ batch 10] NEW autoencoder at 6 for Task: 7
[@ batch 11] outliers for best autoencoders at index: 4 : 300
[@ batch 11] NEW autoencoder at 7 for Task: 1
[@ batch 14] outliers for best autoencoders at index: 0 : 300
[@ batch 14] NEW autoencoder at 8 for Task: 3
[@ batch 21] outliers for best autoencoders at index: 6 : 300
[@ batch 21] NEW autoencoder at 9 for Ta

In [None]:
#unsuccess case due to relatedness of different tasks (e.g. [2, 7] and [3, 1])
#BATCH_SIZE = 1000

#OUTLIER_THRESHOLD = 0.1*BATCH_SIZE
#NEW_AUTOENCODER_EPOCH = 500
#TRAIN_AUTOENCODER_EPOCH = 20
#CODE_DIM = 350

#NUM_TASK = 5
#RANDOM_SEED = np.random.randint(100)
#RANDOM_SEED = 42

#train_loader, test_loader = get_conbined_split_mnist(NUM_TASK, BATCH_SIZE, RANDOM_SEED)

#auto_list, expert_list = train(train_loader)

In [147]:
BATCH_SIZE = 1000

OUTLIER_THRESHOLD = 0.1*BATCH_SIZE
NEW_AUTOENCODER_EPOCH = 200
TRAIN_AUTOENCODER_EPOCH = 20
CODE_DIM = 350

NUM_TASK = 4
RANDOM_SEED = np.random.randint(100)
#RANDOM_SEED = 42

In [148]:
train_loader, test_loader = get_conbined_permute_and_split_mnist(NUM_TASK, BATCH_SIZE, RANDOM_SEED)

split classes: [[3, 2], [9, 0], [1, 6], [7, 5]]


In [150]:
auto_list, expert_list = train(train_loader)

  0%|          | 0/49 [00:00<?, ?it/s]

[@ batch 0] NEW autoencoder at 0 for Task 3
[@ batch 1] outliers for best autoencoders at index: 0 : 466
[@ batch 1] NEW autoencoder at 1 for Task: 1
[@ batch 4] outliers for best autoencoders at index: 0 : 175
[@ batch 4] NEW autoencoder at 2 for Task: 2
[@ batch 5] outliers for best autoencoders at index: 0 : 123
[@ batch 5] NEW autoencoder at 3 for Task: 2
[@ batch 5] DUPLICATE autoencoder for Task: 2
[@ batch 6] outliers for best autoencoders at index: 0 : 447
[@ batch 6] NEW autoencoder at 4 for Task: 0
Complete!
{0: 12000, 1: 12000, 2: 1000, 3: 12000, 4: 12000}
