<a href="https://colab.research.google.com/github/lacykaltgr/continual-learning-ait/blob/main/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
'''Download the files '''
'''Only for colab'''

!wget https://github.com/lacykaltgr/continual-learning-ait/archive/refs/heads/main.zip
!unzip main.zip
!find continual-learning-ait-main -type f ! -name "main.ipynb" -exec cp {} . \;

--2023-05-14 16:36:52--  https://github.com/lacykaltgr/continual-learning-ait/archive/refs/heads/main.zip
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://codeload.github.com/lacykaltgr/continual-learning-ait/zip/refs/heads/main [following]
--2023-05-14 16:36:53--  https://codeload.github.com/lacykaltgr/continual-learning-ait/zip/refs/heads/main
Resolving codeload.github.com (codeload.github.com)... 140.82.121.10
Connecting to codeload.github.com (codeload.github.com)|140.82.121.10|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘main.zip’

main.zip                [  <=>               ]   1.67M  4.95MB/s    in 0.3s    

2023-05-14 16:36:53 (4.95 MB/s) - ‘main.zip’ saved [1751742]

Archive:  main.zip
7628fe5a1ea465ed4b25e68ee85736ac92ef6d09
   creating: continual-lea

In [7]:
import numpy as np
import tensorflow as tf
import keras

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
#from keras.metrics import Accuracy

#import classifier
#from generator import Generator
#from classifier import Encoder, Classifier
import utils
from data_preparation import load_dataset, CLDataLoader

import gc
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import importlib

# Load the dataset

In [8]:
dpt_train, dpt_test = load_dataset('cifar-10', n_classes_first_task=4, n_classes_other_task=3)

In [9]:
batch_size = 256
train_loader = CLDataLoader(dpt_train, batch_size , train=True)
test_loader = CLDataLoader(dpt_train, batch_size, train=False)

Metal device set to: Apple M2


# Define parameters and agent

In [41]:
params = {
    #general
    "n_runs": 1,
    "n_tasks": 3,
    "n_classes": 10,
    "input_shape": (32, 32, 3),
    "embedding_shape": (6, 6, 8),
    "samples_per_task": 10000,
    "batch_size": batch_size,
    "eval_batch_size": 1,
    "print_every": 1,

    #classifier
    "cls_iters": 1,
    "cls_lr": 1e-2,
    "cls_epochs": 5,

    #generator
    "gen_epochs": 1,
    "num_steps": 3,
    "gen_lr": 2e-4,
    "gen_iters": 1,
    "input_latent_strength": 0.9,
    "temperature": 0.9,

    #mir
    "n_mem": 2,
    "mir_iters": 3,
    "reuse_samples": True,
    "mem_coeff": 0.12,
    "z_size": 10,
    
    "gen_ent_coeff": 0.5,
    "gen_div_coeff": 0.5,
    "gen_shell_coeff": 0.5,
    "cls_xent_coeff": 0.5,
    "cls_ent_coeff": 0.5,
    "cls_div_coeff": 0.5,
    "cls_shell_coeff": 0.5,
}

In [29]:
'''Agent to handle models, parameters and states'''

class Agent:
  def __init__(self, hparams):
    self.params = hparams
    self.state = dict()

    self.classifier = None
    self.generator = None
    self.encoder = None
    self.encoder_classifier = None

    self.eval = accuracy_score


  def set_models(
          self,
          _generator=None,
          _classifier=None,
          _encoder  = None,
    ):
    cls = _classifier #classifier
    gen = _generator  #generator
    enc = _encoder #encoder

    self.optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=params["cls_lr"])
    self.optimizer_gen = tf.keras.optimizers.legacy.Adam(learning_rate=params["gen_lr"])

    #encoder pipeline
    data_input = keras.Input(shape=self.params["input_shape"], name="image")
    enc_output = enc(data_input)
    self.encoder = keras.Model(inputs=data_input, outputs=enc_output)

    # classifier pipeline
    latent_input = keras.Input(shape=self.params["embedding_shape"], name="latent")
    cls_output = cls(latent_input)
    self.classifier = keras.Model(inputs=latent_input, outputs=cls_output)
    self.classifier.compile(optimizer=self.optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

    # encoder - classifier pipeline
    data_input = keras.Input(shape=self.params["input_shape"], name="image")
    enc_output = enc(data_input)
    enc_cls_output = cls(enc_output)
    self.encoder_classifier = keras.Model(inputs= data_input, outputs = enc_cls_output)
    self.encoder_classifier.compile(optimizer=self.optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
    
    # generator pipeline
    self.generator = gen
    #latent_input = keras.Input(shape=self.params["embedding_shape"], name="latent")
    #gen_output = gen(latent_input)
    #self.generator = keras.Model(inputs = latent_input, outputs = gen_output)

# Functions for training

In [30]:
'''Generate samples and train the diffusion model at the same time'''

#plusz lehetne itt még kritérium hogy ne menjen olyan messze az alaptól --- similarity loss
#plusz még lehetne talán egy discriminator is, hogy valós reprezentációkat tanuljon meg


def generate(agent, cls=None, input_latent=None, train=True, coeff=1.0):

    if cls is None:
      cls = agent.classifier

    batch_size = input_latent.shape[0]
    latent, alphas, alphas_prev, timesteps = agent.generator.initialize(params, input_latent, batch_size)


    for index, timestep in reversed(list(enumerate(timesteps))):
        if train:
            with tf.GradientTape() as tape:
                e_t = agent.generator.get_model_output(
                    latent,
                    timestep,
                    batch_size,
                )
                a_t, a_prev = alphas[index], alphas_prev[index]
                latent = agent.generator.get_x_prev(latent, e_t,  a_t, a_prev, params["temperature"])

                pred = cls(latent)
                pred_true = utils.get_one_hot_predictions(pred) #ezt nem fixen kell mecsinálni
                confidence_loss = coeff*tf.reduce_mean(tf.keras.losses.categorical_crossentropy(pred_true, pred))
                #print(confidence_loss)
                similarity_loss = 0.1 * tf.reduce_mean(tf.square(latent - e_t))
                #print(similarity_loss)
                loss = confidence_loss + similarity_loss
                agent.state["epoch_eval"]["gen_loss"].append(loss)
            grads = tape.gradient(loss, agent.generator.trainable_variables)
            agent.optimizer_gen.apply_gradients(zip(grads, agent.generator.trainable_variables))
        else:
            e_t = agent.generator.get_model_output(
                latent,
                timestep,
                batch_size,
            )
            a_t, a_prev = alphas[index], alphas_prev[index]
            latent = agent.generator.get_x_prev(latent, e_t,  a_t, a_prev, params["temperature"])

    return latent


In [58]:
'''Retrive maximally interferred latent vector for classifier'''

def retrieve_gen_for_cls(agent):

    print("Retrieving latent vector for classifier...")

    latent = agent.encoder(agent.state["data"])
    virtual_cls = Classifier()
    virtual_cls = utils.get_next_step_cls(
        agent.cls,
        virtual_cls,
        latent,
        agent.state["target"]
    )

    #mean_latent = tf.cast(tf.reduce_mean(latent, axis=0), tf.float64)
    final_latent = None

    for i in range(agent.params["n_mem"]):

        generated = generate(agent, input_latent=latent, train=False)

        for j in range(params["mir_iters"]):
            with tf.GradientTape(persistent=True) as tape:

                tape.watch(generated)

                y_pre = agent.classifier(generated)
                y_virtual = virtual_cls(generated)

                # maximise the interference:
                XENT = tf.constant(0.)
                if params["cls_xent_coeff"] > 0.:
                    XENT = tf.keras.losses.categorical_crossentropy(y_virtual, y_pre)

                # the predictions from the two models should be confident
                ENT = tf.constant(0.)
                if params["cls_ent_coeff"] > 0.:
                    ENT = tf.keras.losses.categorical_crossentropy(y_pre, y_pre)

                # the new-found samples should be different from each others
                DIV = tf.constant(0.)
                if params["cls_div_coeff"] > 0.:
                    for found_generated in range(i):
                        DIV += tf.keras.losses.MSE(
                            generated,
                            final_latent[found_generated * generated.shape[0]:found_generated * generated.shape[0] + generated.shape[0]]
                        ) / i

                # (NEW) stay on gaussian shell loss:
                SHELL = tf.constant(0.)
                if params["cls_shell_coeff"] > 0.:
                    SHELL = tf.keras.losses.MSE(
                        tf.norm(generated, axis=1),
                        tf.ones_like(tf.norm(generated, axis=1))*np.sqrt(params["z_size"])
                    )

                XENT, ENT, DIV, SHELL = \
                    tf.reduce_mean(XENT), \
                        tf.reduce_mean(ENT), \
                        tf.reduce_mean(DIV), \
                        tf.reduce_mean(SHELL)

                gain = params["cls_xent_coeff"] * XENT + \
                       -params["cls_ent_coeff"] * ENT + \
                       params["cls_div_coeff"] * DIV + \
                       -params["cls_shell_coeff"] * SHELL

            gen_grad = tape.gradient(gain, generated)
            if gen_grad is not None:
                generated = (generated + 1 * gen_grad)

        if final_latent is None:
            final_latent = generated.numpy().copy()
        else:
            final_latent = np.concatenate([final_latent, generated.numpy().copy()])

    tf.stop_gradient(final_latent)

    mir_worked = not np.isnan(final_latent).any()
    mem_x = final_latent if mir_worked else generate(agent, train=False)
    mem_y = agent.classifier(mem_x).numpy()

    return mem_x, mem_y, mir_worked

In [46]:
'''Retrive maximally interferred latent vector for generator'''


def retrieve_gen_for_gen(agent):

    print("Retrieving latent vector for generator...")

    latent = agent.encoder(agent.state["data"])
    #mean_latent = tf.cast(tf.reduce_mean(latent, axis=0), tf.float64)
    final_latent = None

    for i in range(params["n_mem"]):

        generated = generate(agent, input_latent=latent, train=False)

        for j in range(params["mir_iters"]):

            with tf.GradientTape(persistent=True) as tape:
                tape.watch(generated)

                # the predictions from the two models should be confident
                ENT = tf.constant(0.)
                if params["gen_ent_coeff"]>0.:
                    y_pre = agent.classifier(generated)
                    y_pre_true = utils.get_one_hot_predictions(y_pre)
                    ENT = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_pre_true, y_pre))

                # the new-found samples should be different from each others
                DIV = tf.constant(0.)
                if params["gen_div_coeff"]>0.:
                    for found_generated in range(i):
                        DIV += tf.reduce_mean(tf.math.squared_difference(
                            generated,
                            final_latent[found_generated * generated.shape[0]:found_generated * generated.shape[0] + generated.shape[0]])
                        ) / i

                # (NEW) stay on gaussian shell loss:
                SHELL = tf.constant(0.)
                if params["gen_shell_coeff"]>0.:
                    SHELL = tf.reduce_mean(tf.math.squared_difference(
                        tf.norm(generated, ord=2, axis=1),
                        tf.ones_like(tf.norm(generated, ord=2, axis=1))*np.sqrt(params["z_size"])))


                gain =params["gen_div_coeff"] * DIV + \
                      -params["gen_ent_coeff"] * ENT + \
                       -params["gen_shell_coeff"] * SHELL

            grad = tape.gradient(gain, generated)
            generated = (generated + grad)

        if final_latent is None:
            final_latent = tf.identity(generated)
        else:
            final_latent = tf.concat([final_latent, generated], axis=0)


    tf.stop_gradient(final_latent)

    mir_worked = not np.isnan(final_latent).any()
    mem_x = final_latent if mir_worked else generate(agent, train=False)

    return mem_x, mir_worked

In [44]:
'''Train the generator unit'''

def train_generator(agent):

    data = agent.state["data"]
    latent = agent.encoder(data)

    mem_x = None
    for it in range(agent.params["gen_iters"]):
        generate(agent, input_latent=latent)

        if agent.state["task"] > 0:
            if it == 0 or not agent.params["reuse_samples"]:
                mem_x, mir_worked = retrieve_gen_for_gen(agent)

                agent.state["mir_tries"] += 1
                if mir_worked:
                    agent.state["mir_success"] += 1

        if mem_x is not None:
          if len(mem_x.shape) == 3:
            mem_x = tf.expand__dims(mem_x, axis=-1)
          generate(agent, input_latent=mem_x, coeff=agent.params["mem_coeff"])


In [56]:
'''Train the encoder and the classifier unit'''

def train_classifier(agent):

    data = agent.state["data"]
    target = agent.state["target"]
    mem_x, mem_y = None, None

    for it in range(agent.params["cls_iters"]):
        history = agent.encoder_classifier.fit(data, target, batch_size=agent.params["batch_size"], epochs=1, verbose=0)
        if agent.state["task"] > 0:
            if it == 0 or not agent.params["reuse_samples"]:
                mem_x, mem_y, mir_worked = retrieve_gen_for_cls(agent)
                agent.state["mir_tries"] += 1
                if mir_worked:
                    agent.state["mir_success"] += 1

            if mem_x is not None:
                mem_history = agent.classifier.fit(mem_x, utils.get_one_hot_predictions(mem_y), batch_size=agent.params["batch_size"], epochs=1, verbose=1)

                agent.state["epoch_eval"]["retr_cls_loss"].append(mem_history.history["loss"][0])
                agent.state["epoch_eval"]["retr_cls_accuracy"].append(mem_history.history["accuracy"][0])
        agent.state["epoch_eval"]["cls_loss"].append(history.history["loss"][0])
        agent.state["epoch_eval"]["cls_acc"].append(history.history["accuracy"][0])

In [48]:
'''Train model'''

def train_model(train_function, agent):
    agent.state["sample_amt"] = 0
    loader = agent.state["tr_loader"]
    for i, (data, target) in enumerate(loader):
        #if agent.state["sample_amt"] > agent.params["samples_per_task"] > 0: break
        agent.state["sample_amt"] += data.shape[0]
        agent.state["data"] = data
        agent.state["target"] = target
        agent.state["i_example"] = i
        train_function(agent)

In [53]:
'''Run an epoch'''

def run_cls_epoch(agent):

    print(f"Running Task {agent.state['task']}, Epoch: {agent.state['epoch']} on Classifier")

    agent.state["epoch_eval"]["cls_loss"] = []
    agent.state["epoch_eval"]["cls_acc"] = []
    agent.state["epoch_eval"]["retr_cls_loss"] = []
    agent.state["epoch_eval"]["retr_cls_accuracy"] = []

    train_model(train_classifier, agent)

    '''Evaluate the models in epoch'''
    if agent.state["epoch"] % agent.params["print_every"] == 0:
        print(f"    Classifier loss: {np.mean(agent.state['epoch_eval']['cls_loss'])}"
              f"    Classifier accuracy: {np.mean(agent.state['epoch_eval']['cls_acc'])}")

In [54]:
'''Run an epoch'''

def run_gen_epoch(agent):
  
    print(f"Running Task {agent.state['task']}, Epoch: {agent.state['epoch']} on Generator")
    
    agent.state["epoch_eval"]["gen_loss"] = []
    agent.state["epoch_eval"]["retr_gen_loss"] = []
    agent.state["epoch_eval"]["retr_gen_accuracy"] = []

    train_model(train_generator, agent)

    '''Evaluate the models in epoch'''
    if agent.state["epoch"] % agent.params["print_every"] == 0:
        print(f"    Generator loss: {np.mean(agent.state['epoch_eval']['gen_loss'])}")
              #f"    Loss on gen retrieve for cls: {np.mean(agent.state['epoch_eval']['retr_cls_loss'])}"
              #f"    Loss on gen retrieve for gen: {np.mean(agent.state['epoch_eval']['retr_gen_loss'])}")

In [50]:
'''Run a task'''

def run_task(agent):

    agent.state["mir_tries"], agent.state["mir_success"] = 0, 0
    agent.state["epoch_eval"] = dict()

    for epoch in range(agent.params["cls_epochs"]):
        agent.state["epoch"] = epoch
        run_cls_epoch(agent)

    for epoch in range(agent.params["gen_epochs"]):
        agent.state["epoch"] = epoch
        run_gen_epoch(agent)

    '''Evaluate forgetting'''
    if (agent.state['task']) > 0:
      print("Evaluate Task: ", agent.state["task"])
    for i in range(agent.state["task"]):
        task_loss = []
        task_eval = []
        for data, target in test_loader[i]:
            logits = agent.encoder_classifier(data)
            pred = np.argmax(logits, axis=1)
            y = np.argmax(target, axis=1)
            eval = agent.eval(y, pred)
            task_eval.append(eval)
            loss = tf.keras.losses.categorical_crossentropy(target, logits)
            task_loss.append(np.mean(loss))
        print(f"    Task {agent.state['task']} forgetting on task {i} : "
              f"        Loss: {np.mean(task_loss)}"
              f"        ACC: {np.mean(task_eval)}")

    #print("MIR success rate: ", agent.state["mir_success"] / agent.state["mir_tries"])

In [51]:
'''Run the experiment'''

def run(agent):

  agent.set_models(
      _classifier=Classifier(),
      _generator=Generator(img_height=32, img_width=32),
      _encoder=Encoder(),
  )

  for task, (tr_loader, ts_loader) in enumerate(zip(train_loader, test_loader)):
    agent.state["task"] = task
    agent.state["tr_loader"] = tr_loader
    agent.state["ts_loader"] = ts_loader
    run_task(agent)

# Training

In [None]:
agent = Agent(params)
for r in range(agent.params["n_runs"]):
  agent.state["run"] = r
  run(agent)

Generator init
Running Task 0, Epoch: 0 on Classifier
    Classifier loss: 1.1025629296141155    Classifier accuracy: 0.6429527209976972
Running Task 0, Epoch: 1 on Classifier
    Classifier loss: 0.529264626361556    Classifier accuracy: 0.7715767167382321
Running Task 0, Epoch: 2 on Classifier
    Classifier loss: 0.46851174811185414    Classifier accuracy: 0.7981712757530859
Running Task 0, Epoch: 3 on Classifier
    Classifier loss: 0.42587326492293404    Classifier accuracy: 0.8232395739878638
Running Task 0, Epoch: 4 on Classifier
    Classifier loss: 0.3907396576162112    Classifier accuracy: 0.8342335244356576
Running Task 0, Epoch: 0 on Generator


# Eval

In [None]:
''' Evaluation'''
for task, loader in enumerate(test_loader):
    print("Task: ", task)
    LOSS = []
    ACC = []
    for data, target in loader:
      logits = agent.classifier_model(data)
      pred = np.argmax(logits, axis=1)
      report = agent.eval(np.argmax(target, axis=1), pred)
      loss = tf.keras.losses.categorical_crossentropy(target, logits)
      #print(report)
      ACC.append(report)
      LOSS.append(loss)
    print("Mean loss: ", np.mean(LOSS))
    print("Mean accuracy: ", np.mean(ACC))
    print("\n")

# Evaluation, testing

In [None]:
def evaluate(loader, first_n_tasks=None):
    for task, tr_loader in enumerate(loader):
        print("Task: ", task)
        data, target = tr_loader.batch(124)
        logits = agent.classifier_model(data)
        pred = np.argmax(logits, axis=1)
        report = agent.eval(np.argmax(target, axis=1), pred)
        loss = tf.keras.losses.categorical_crossentropy(target, logits)
        print(report)
        print("Mean loss: ", np.mean(loss))

In [None]:
print("Evaluation on training set:")
evaluate(train_loader)
print("Evaluation on test set:")
evaluate(test_loader)

# Utils for development

In [None]:
# Reload modules
importlib.reload()

<module 'stable_diffusion.stable_diffusion' from '/Users/laszlofreund/PycharmProjects/continual-learning-ait/stable_diffusion/stable_diffusion.py'>

In [None]:
# Garbage collection
gc.collect()

21547

# Generator

In [20]:
import torchvision
import torch

AttributeError: module 'numpy' has no attribute 'ndarray'

In [21]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True)

NameError: name 'torchvision' is not defined

In [6]:
(X_train, y_train), (X_test, y_test) = load_cifar_10()

AttributeError: module 'tensorflow' has no attribute 'keras'

In [None]:
!wget -U https://nvlabs-fi-cdn.nvidia.com/edm/pretrained/edm-cifar10-32x32-cond-vp.pkl -P /Users/laszlofreund/PycharmProjects/continual-learning-ait/checkpoints/
!wget -U

In [2]:
import classifier_lib
import pickle
from train import train_generator

In [3]:
encoder =       classifier_lib.load_encoder('/checkpoints/32x32_classifier.pt', 32, "cpu", eval=False)
discriminator = classifier_lib.load_discriminator(None, "cpu", eval=False)

# Load pretrained score network.
#with open('/Users/laszlofreund/PycharmProjects/continual-learning-ait#/checkpoints/edm-cifar10-32x32-cond-vp.pkl', 'rb') as f:
  #  scorenet = pickle.load(f)['ema'].to("cpu")

In [5]:
train_generator(None, None, None, None, None, None)

ValueError: zero-dimensional arrays cannot be concatenated