In [None]:
'''Download the files '''
'''Only for colab'''

!wget https://github.com/lacykaltgr/continual-learning-ait/archive/refs/heads/main.zip
!unzip main.zip
!find continual-learning-ait-main -type f ! -name "main.ipynb" -exec cp {} . \;

!rm -r stable_diffusion
!mkdir stable_diffusion
!mv diffusion_model.py stable_diffusion/
!mv autoencoder_kl.py stable_diffusion/
!mv layers.py stable_diffusion/
!mv stable_diffusion.py stable_diffusion/
!mv constants.py stable_diffusion/

--2023-04-11 14:57:07--  https://github.com/lacykaltgr/continual-learning-ait/archive/refs/heads/main.zip
Resolving github.com (github.com)... 140.82.113.4
Connecting to github.com (github.com)|140.82.113.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://codeload.github.com/lacykaltgr/continual-learning-ait/zip/refs/heads/main [following]
--2023-04-11 14:57:07--  https://codeload.github.com/lacykaltgr/continual-learning-ait/zip/refs/heads/main
Resolving codeload.github.com (codeload.github.com)... 140.82.112.10
Connecting to codeload.github.com (codeload.github.com)|140.82.112.10|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘main.zip’

main.zip                [ <=>                ] 154.08K  --.-KB/s    in 0.05s   

2023-04-11 14:57:07 (3.06 MB/s) - ‘main.zip’ saved [157773]

Archive:  main.zip
9d267e6b95e57ad02d2e58c6e1d1f16dcffdefcb
   creating: continual-learning-ait-main/
  i

In [None]:
import tensorflow as tf
import numpy as np
import keras
from tqdm import tqdm

from sklearn.metrics import classification_report

import classifier
from stable_diffusion import stable_diffusion
import utils
from data_preparation import load_dataset, CLDataLoader

import gc
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import importlib

# Load the dataset

In [2]:
dpt_train, dpt_test = load_dataset('cifar-100')

In [3]:
batch_size = 256
train_loader = CLDataLoader(dpt_train, batch_size , train=True)
test_loader = CLDataLoader(dpt_train, batch_size, train=False)

Metal device set to: Apple M2


# Define parameters and agent

In [39]:
params = {
    #general
    "device": 'cuda:0' if tf.config.list_physical_devices('GPU') else 'cpu',
    "n_runs": 1,
    "n_tasks": 10,
    "n_epochs": 2,
    "n_classes": 100,
    "input_shape": (32,32, 3),
    "embedding_shape": (4,4,4),
    "samples_per_task": 1000,
    "batch_size": batch_size,
    "print_every": 5,

    #classifier
    "cls_iters": 10,
    "cls_hiddens": 32,
    "cls_lr": 0.001,

    #generator
    "gen_depth": 6,
    "num_steps": 10,
    "gen_iters": 1,
    "input_latent_strength":0.5,
    "gen_temperature":1,
    "gen_lr": 0.001,
    "temperature": 1,

    #mir
    "reuse_samples": True,
    "cls_mir_gen": 1,
    "gen_mir_gen": 1,
    "mem_coeff": 0.12,
    "n_mem": 10,
    "mir_init_prior": 10,
    "z_size": 10,
    "mir_iters": 3,
    "gen_kl_coeff": 0.5,
    "gen_rec_coeff": 0.5,
    "gen_ent_coeff": 0.5,
    "gen_div_coeff": 0.5,
    "gen_shell_coeff": 0.5,
    "cls_xent_coeff": 0.5,
    "cls_ent_coeff": 0.5,
    "cls_div_coeff": 0.5,
    "cls_shell_coeff": 0.5,
}

In [5]:
'''Agent to handle models, parameters and states'''

class Agent:
  def __init__(self, hparams):
    self.cls = None
    self.opt = None
    self.opt_gen = None
    self.gen = None
    self.params = hparams
    self.state = dict()
    self.encoder = None
    self.classifier_model = None
    #self.decoder = None

  def set_models(self, generator=None, classifier=None):
    self.cls = classifier
    self.gen = generator
    self.encoder = generator.encoder
    #self.decoder = gen.decoder
    self.opt = tf.keras.optimizers.legacy.Adam(learning_rate=params["cls_lr"])
    self.opt_gen = tf.keras.optimizers.legacy.Adam(learning_rate=params["gen_lr"])

    data_input = keras.Input(shape=self.params["input_shape"], name="image")
    encoder_output = agent.encoder(data_input)
    cls_output = agent.cls(encoder_output)
    self.classifier_model = keras.Model(inputs=data_input, outputs=cls_output)
    self.classifier_model.compile(optimizer=self.opt, loss="categorical_crossentropy", metrics=["accuracy"])

# Functions for training

In [6]:
'''Generate samples and train the diffusion model the same time'''

def generate(agent, cls=None, input_latent=None, train=True, coeff=1.0):

    print("Generating samples...")

    if cls is None:
        cls = agent.cls

    latent, alphas, alphas_prev, timesteps = agent.gen.initialize(agent.params, input_latent)


    progbar = tqdm(list(enumerate(timesteps))[::-1])
    for index, timestep in progbar:
        progbar.set_description(f'{index:3d} {timestep:3d}')
        if train:
            with tf.GradientTape() as tape:
                e_t = agent.gen.get_model_output(
                    latent,
                    timestep,
                    agent.params['batch_size'],
                )
                a_t, a_prev = alphas[index], alphas_prev[index]
                latent = agent.gen.get_x_prev(latent, e_t,  a_t, a_prev, agent.params["temperature"])

                pred = cls(latent)
                #loss based on confidence
                #ENT = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_pre, logits=y_pre))
                loss = coeff*tf.keras.losses.categorical_crossentropy(pred, pred)
            grads = tape.gradient(loss, agent.gen.diffusion_model.trainable_variables)
            agent.opt_gen.apply_gradients(zip(grads, agent.gen.diffusion_model.trainable_variables))
        else:
            e_t = agent.gen.get_model_output(
                latent,
                timestep,
                agent.params['batch_size'],
            )
            a_t, a_prev = alphas[index], alphas_prev[index]
            latent = agent.gen.get_x_prev(latent, e_t,  a_t, a_prev, agent.params["temperature"])



    return latent

In [29]:
'''Retrive maximally interferred latent vector for classifier'''

def retrieve_gen_for_cls(agent):

    print("Retrieving latent vector for classifier...")

    latent = agent.gen.encoder(agent.state["data"])
    virtual_cls = classifier.classifier(agent.params)
    virtual_cls = utils.get_next_step_cls(
        agent.cls,
        virtual_cls,
        latent,
        agent.state["target"]
    )

    #mean_latent = tf.cast(tf.reduce_mean(latent, axis=0), tf.float64)
    z_new_max = None

    for i in range(agent.params["n_mem"]):

        z_new = generate(agent, input_latent=latent, train=False, coeff=0.1)

        for j in range(params["mir_iters"]):
            with tf.GradientTape(persistent=True) as tape:

                tape.watch(z_new)

                #z_new = tf.cast(z_new, tf.float64)
                y_pre = agent.cls(z_new)
                y_virtual = virtual_cls(z_new)

                # maximise the interference:
                XENT = tf.constant(0.)
                if params["cls_xent_coeff"] > 0.:
                    XENT = tf.keras.losses.categorical_crossentropy(y_virtual, y_pre)

                # the predictions from the two models should be confident
                ENT = tf.constant(0.)
                if params["cls_ent_coeff"] > 0.:
                    ENT = tf.keras.losses.categorical_crossentropy(y_pre, y_pre)

                # the new-found samples should be different from each others
                DIV = tf.constant(0.)
                if params["cls_div_coeff"] > 0.:
                    for found_z_i in range(i):
                        DIV += tf.keras.losses.MSE(
                            z_new,
                            z_new_max[found_z_i * z_new.shape[0]:found_z_i * z_new.shape[0] + z_new.shape[0]]
                        ) / i

                # (NEW) stay on gaussian shell loss:
                SHELL = tf.constant(0.)
                if params["cls_shell_coeff"] > 0.:
                    SHELL = tf.keras.losses.MSE(
                        tf.norm(z_new, axis=1),
                        tf.ones_like(tf.norm(z_new, axis=1))*np.sqrt(params["z_size"])
                    )

                XENT, ENT, DIV, SHELL = \
                    tf.reduce_mean(XENT), \
                        tf.reduce_mean(ENT), \
                        tf.reduce_mean(DIV), \
                        tf.reduce_mean(SHELL)

                gain = params["cls_xent_coeff"] * XENT + \
                       -params["cls_ent_coeff"] * ENT + \
                       params["cls_div_coeff"] * DIV + \
                       -params["cls_shell_coeff"] * SHELL

            z_g = tape.gradient(gain, z_new)
            if z_g is not None:
                z_new = (z_new + 1 * z_g)

        if z_new_max is None:
            z_new_max = z_new.numpy().copy()
        else:
            z_new_max = np.concatenate([z_new_max, z_new.numpy().copy()])

    tf.stop_gradient(z_new_max)

    if np.isnan(z_new_max).any():
        mir_worked = 0
        mem_x = generate(agent, train=False)
    else:
        mem_x = z_new_max
        mir_worked = 1

    mem_y = agent.cls(mem_x).numpy()

    return mem_x, mem_y, mir_worked

In [27]:
'''Retrive maximally interferred latent vector for generator'''
#TODO: vmi más loss is (maximise interference)

def retrieve_gen_for_gen(agent):

    print("Retrieving latent vector for generator...")

    latent = agent.gen.encoder(agent.state["data"])
    #mean_latent = tf.cast(tf.reduce_mean(latent, axis=0), tf.float64)
    z_new_max = None

    for i in range(params["n_mem"]):

        z_new = generate(agent, input_latent=latent, train=False, coeff=0.1)

        for j in range(params["mir_iters"]):

            with tf.GradientTape(persistent=True) as tape:
                tape.watch(z_new)
                # the predictions from the two models should be confident
                ENT = tf.constant(0.)
                if params["gen_ent_coeff"]>0.:
                    y_pre = agent.cls(z_new)
                    ENT = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_pre, y_pre))

                # the new-found samples should be different from each others
                DIV = tf.constant(0.)
                if params["gen_div_coeff"]>0.:
                    for found_z_i in range(i):
                        DIV += tf.reduce_mean(tf.math.squared_difference(
                            z_new,
                            z_new_max[found_z_i * z_new.shape[0]:found_z_i * z_new.shape[0] + z_new.shape[0]])
                        ) / i

                # (NEW) stay on gaussian shell loss:
                SHELL = tf.constant(0.)
                if params["gen_shell_coeff"]>0.:
                    SHELL = tf.reduce_mean(tf.math.squared_difference(
                        tf.norm(z_new, ord=2, axis=1),
                        tf.ones_like(tf.norm(z_new, ord=2, axis=1))*np.sqrt(params["z_size"])))


                gain =params["gen_div_coeff"] * DIV + \
                      -params["gen_ent_coeff"] * ENT + \
                       -params["gen_shell_coeff"] * SHELL

            z_g = tape.gradient(gain, z_new)
            z_new = (z_new + z_g)

        if z_new_max is None:
            z_new_max = tf.identity(z_new)
        else:
            z_new_max = tf.concat([z_new_max, z_new], axis=0)


    tf.stop_gradient(z_new_max)

    if np.isnan(z_new_max).any():
        mir_worked = 0
        mem_x = generate(agent, train=False)
    else:
        mem_x = z_new_max
        mir_worked = 1

    return mem_x, mir_worked

In [9]:
'''Train the generator unit'''

def train_generator(agent):

    print("Training generator...")

    data = agent.state["data"]
    latent = agent.gen.encoder(data)
    mem_x = None

    for it in range(agent.params["gen_iters"]):
        generate(agent, input_latent=latent)

        if agent.state["task"] > 0:
            if it == 0 or not agent.params["reuse_samples"]:
                mem_x, mir_worked = retrieve_gen_for_gen(agent)

                agent.state["mir_tries"] += 1
                if mir_worked:
                    agent.state["mir_success"] += 1


        if mem_x is not None:
            generate(agent, input_latent=mem_x, coeff=agent.params["mem_coeff"])


In [10]:
'''Train the encoder and the classifier unit'''

def train_classifier(agent):

    print("Training classifier...")

    data = agent.state["data"]
    target = agent.state["target"]
    mem_x, mem_y = None, None

    for it in range(agent.params["cls_iters"]):
        agent.classifier_model.fit(data, target, batch_size=agent.params["batch_size"], epochs=1, verbose=0)
        if agent.state["task"] > 0:
            if it == 0 or not agent.params["reuse_samples"]:
                mem_x, mem_y, mir_worked = retrieve_gen_for_cls(agent)
                agent.state["mir_tries"] += 1
                if mir_worked:
                    agent.state["mir_success"] += 1

            if mem_x is not None:
                agent.classifier_model.fit(mem_x, mem_y, batch_size=agent.params["batch_size"], epochs=1, verbose=0)



In [11]:
'''Run an epoch'''

def run_epoch(agent):

    gc.collect()

    print("Running epoch...")

    for i, (data, target) in enumerate(agent.state["tr_loader"]):
        if agent.state["sample_amt"] > agent.params["samples_per_task"] > 0: break
        agent.state["sample_amt"] += data.shape[0]

        agent.state["data"] = data
        agent.state["target"] = target
        agent.state["i_example"] = i

        train_classifier(agent)
        train_generator(agent)

        '''Evaluate the models in epoch'''
        if i % agent.params["print_every"] == 0:

            print("Task: ", agent.state["task"], " Epoch: ", i)

            '''Evaluate the classifier'''
            logits = agent.classifier_model(data)
            pred = np.argmax(logits, axis=1)
            report = classification_report(np.argmax(target, axis=1), pred)
            loss = tf.keras.losses.categorical_crossentropy(target, logits)
            print(report)
            print("Mean loss: ", np.mean(loss))

            '''Evaluate the generator'''
            mem_x = generate(agent, input_latent=agent.encoder(data), train=False)
            mem_pred = np.argmax(agent.cls(mem_x))
            mem_loss = tf.keras.losses.categorical_crossentropy(mem_pred, mem_pred)
            print("Loss on generate: ",  np.mean(mem_loss))

            mem_x_cls, mem_y, mir_worked_cls = retrieve_gen_for_cls(agent)
            mem_loss_cls = tf.keras.losses.categorical_crossentropy(mem_y, mem_y)
            print("Loss on retrieve for cls: ",  np.mean(mem_loss_cls))
            print("MIR worked on retrieve for cls: ", mir_worked_cls)

            mem_x_gen, mir_worked_gen = retrieve_gen_for_gen(agent)
            mem_pred_gen = np.argmax(agent.cls(mem_x_gen))
            mem_loss_gen = tf.keras.losses.categorical_crossentropy(mem_pred_gen, mem_pred_gen)
            print("Loss on retrieve for gen: ",  np.mean(mem_loss_gen))
            print("MIR worked on retrieve for gen: ", mir_worked_gen)

In [12]:
'''Run a task'''

def run_task(agent):

    print("Running task...")

    agent.state["mir_tries"], agent.state["mir_success"] = 0, 0
    agent.state["sample_amt"] = 0

    for epoch in range(agent.params["n_epochs"]):
        run_epoch(agent)

    '''Evaluate forgetting'''
    print("Task: ", agent.state["task"])
    for i in range(agent.state["task"]):
        print("Task forgetting on task ", i)
        data, target = train_loader[i].batch(124)
        logits = agent.classifier_model(data)
        pred = np.argmax(logits, axis=1)
        report = classification_report(np.argmax(target, axis=1), pred)
        loss = tf.keras.losses.categorical_crossentropy(target, logits)
        print(report)
        print("Mean loss: ", np.mean(loss))

    print("MIR success rate: ", agent.state["mir_success"] / agent.state["mir_tries"])

In [72]:
'''Run the experiment'''

def run(agent):

  agent.set_models(
      classifier=classifier.classifier(agent.params),
      generator=stable_diffusion.StableDiffusion(img_height=32, img_width=32, download_weights=True))

  for task, tr_loader in enumerate(train_loader):
    agent.state["task"] = task
    agent.state["tr_loader"] = tr_loader
    run_task(agent)

# Training

In [None]:
agent = Agent(params)
for r in range(agent.params["n_runs"]):
  agent.state["run"] = r
  run(agent)

# Testing for development

In [None]:
tasks_to_test = 2

agent = Agent(params)
agent.set_models(
    classifier=classifier.classifier(agent.params),
    generator=stable_diffusion.StableDiffusion(img_height=32, img_width=32, download_weights=True))
for task, tr_loader in enumerate(train_loader[:tasks_to_test]):
    agent.state["task"] = task
    agent.state["tr_loader"] = tr_loader
    run_task(agent)

# Evaluation, testing

In [None]:
def evaluate(loader):
    for task, tr_loader in enumerate(train_loader):
        print("Task: ", task)
        data, target = tr_loader.batch(124)
        logits = agent.classifier_model(data)
        pred = np.argmax(logits, axis=1)
        report = classification_report(np.argmax(target, axis=1), pred)
        loss = tf.keras.losses.categorical_crossentropy(target, logits)
        print(report)
        print("Mean loss: ", np.mean(loss))

In [None]:
print("Evaluation on training set:")
evaluate(train_loader)
print("Evaluation on test set:")
evaluate(test_loader)

# Utils for development

In [87]:
# Reload modules
importlib.reload(utils)

<module 'utils' from '/Users/laszlofreund/PycharmProjects/continual-learning-ait/utils.py'>

In [None]:
# Garbage collection
gc.collect()