<a href="https://colab.research.google.com/github/lacykaltgr/continual-learning-ait/blob/experiment/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
'''Download the files '''
'''Only for colab'''

!wget https://github.com/lacykaltgr/continual-learning-ait/archive/refs/heads/experiment.zip
!unzip experiment.zip
#!find continual-learning-ait-experiment -type f ! -name "main.ipynb" -exec cp {} . \;
!cd continual-learning-ait-experiment

import sys
sys.path.append('/content/continual-learning-ait-experiment')

--2023-05-15 14:05:41--  https://github.com/lacykaltgr/continual-learning-ait/archive/refs/heads/experiment.zip
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://codeload.github.com/lacykaltgr/continual-learning-ait/zip/refs/heads/experiment [following]
--2023-05-15 14:05:41--  https://codeload.github.com/lacykaltgr/continual-learning-ait/zip/refs/heads/experiment
Resolving codeload.github.com (codeload.github.com)... 140.82.112.9
Connecting to codeload.github.com (codeload.github.com)|140.82.112.9|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘experiment.zip.1’

experiment.zip.1        [ <=>                ]   1.62M  --.-KB/s    in 0.1s    

2023-05-15 14:05:41 (11.1 MB/s) - ‘experiment.zip.1’ saved [1696824]

Archive:  experiment.zip
81a3470d8644a0f0af548380128accc41c1ffbf4
replac

In [3]:
import numpy as np
import tensorflow as tf
import keras

import torch
import torchvision.transforms as transforms

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
#from keras.metrics import Accuracy

#import classifier
from generator import Generator
from classifier import Classifier
import utils
from data_preparation import load_dataset, CLDataLoader, RealFakeConditionalDataset

import gc
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import importlib

# Load the dataset

In [4]:
dpt_train, dpt_test = load_dataset('cifar-10', n_classes_first_task=4, n_classes_other_task=3)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [6]:
batch_size = 256
train_loader = CLDataLoader(dpt_train, batch_size , train=True)
test_loader = CLDataLoader(dpt_test, batch_size, train=False)

# Define parameters and agent

In [7]:
params = {
    #general
    "n_runs": 1,
    "n_tasks": 3,
    "n_classes": 10,
    "input_shape": (32, 32, 3),
    "embedding_shape": (6, 6, 8),
    "samples_per_task": 10000,
    "batch_size": batch_size,
    "eval_batch_size": 1,
    "print_every": 1,
    "device": "cuda" if torch.cuda.is_available() else "cpu",

    #classifier
    "cls_iters": 1,
    "cls_lr": 1e-2,
    "cls_epochs": 5,

    #generator
    "gen_epochs": 1,
    "num_steps": 3,
    "gen_lr": 2e-4,
    "gen_iters": 1,
    "input_latent_strength": 0.9,
    "temperature": 0.9,

    #mir
    "n_mem": 2,
    "mir_iters": 3,
    "reuse_samples": True,
    "mem_coeff": 0.12,
    "z_size": 10,
    
    "gen_ent_coeff": 0.5,
    "gen_div_coeff": 0.5,
    "gen_shell_coeff": 0.5,
    "cls_xent_coeff": 0.5,
    "cls_ent_coeff": 0.5,
    "cls_div_coeff": 0.5,
    "cls_shell_coeff": 0.5,
}

In [8]:
'''Agent to handle models, parameters and states'''

class Agent:
  def __init__(self, hparams):
    self.params = hparams
    self.state = dict()

    self.classifier = None
    self.generator = None
    self.encoder = None
    self.encoder_classifier = None

    self.eval = accuracy_score


  def set_models(
          self,
          _generator=None,
          _classifier=None,
    ):
    cls = _classifier #classifier
    gen = _generator  #generator

    self.optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=params["cls_lr"])
    #self.optimizer_gen = tf.keras.optimizers.legacy.Adam(learning_rate=params["gen_lr"])

    #encoder pipeline
    #data_input = keras.Input(shape=self.params["input_shape"], name="image")
    #enc_output = enc(data_input)
    #self.encoder = keras.Model(inputs=data_input, outputs=enc_output)

    # classifier pipeline
    data_input = keras.Input(shape=self.params["input_shape"], name="image")
    cls_output = cls(data_input)
    self.classifier = keras.Model(inputs=data_input, outputs=cls_output)
    self.classifier.compile(optimizer=self.optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

    # encoder - classifier pipeline
    #data_input = keras.Input(shape=self.params["input_shape"], name="image")
    #enc_output = enc(data_input)
    #enc_cls_output = cls(enc_output)
    #self.encoder_classifier = keras.Model(inputs= data_input, outputs = enc_cls_output)
    #self.encoder_classifier.compile(optimizer=self.optimizer, loss="categorical_crossentropy", metrics=["accuracy"])
    
    # generator pipeline
    self.generator = gen
    #latent_input = keras.Input(shape=self.params["embedding_shape"], name="latent")
    #gen_output = gen(latent_input)
    #self.generator = keras.Model(inputs = latent_input, outputs = gen_output)

# Functions for training

In [9]:
'''Generate samples and train the diffusion model at the same time'''

def generate(
        agent,
        boosting, time_min, time_max,
        dg_weight_1st_order, dg_weight_2nd_order,
        batch_size,
        class_idx=None,
):
    # Pick latents and labels.
    latents = torch.randn([batch_size, agent.generator.net.img_channels, agent.params.img_resolution, agent.params.img_resolution], device=agent.params['device'])

    class_labels = torch.eye(agent.params['n_classes'], device=agent.params['device'])[torch.randint(agent.params['classes_learned'], size=[batch_size], device=agent.params['device'])]
    if class_idx is not None:
        class_labels[:, :] = 0
        class_labels[:, class_idx] = 1

    # Generate images.
    images = agent.generator.sample(boosting, time_min, time_max, dg_weight_1st_order, dg_weight_2nd_order, latents, class_labels, randn_like=torch.randn_like)

    return images, class_labels


In [None]:
'''Retrive maximally interferred latent vector for classifier'''

def retrieve_gen_for_cls(agent):

    print("Retrieving latent vector for classifier...")

    latent = agent.encoder(agent.state["data"])
    virtual_cls = Classifier()
    virtual_cls = utils.get_next_step_cls(
        agent.cls,
        virtual_cls,
        latent,
        agent.state["target"]
    )

    #mean_latent = tf.cast(tf.reduce_mean(latent, axis=0), tf.float64)
    final_latent = None

    for i in range(agent.params["n_mem"]):

        generated = generate(agent)

        for j in range(params["mir_iters"]):
            with tf.GradientTape(persistent=True) as tape:

                tape.watch(generated)

                y_pre = agent.classifier(generated)
                y_virtual = virtual_cls(generated)

                # maximise the interference:
                XENT = tf.constant(0.)
                if params["cls_xent_coeff"] > 0.:
                    XENT = tf.keras.losses.categorical_crossentropy(y_virtual, y_pre)

                # the predictions from the two models should be confident
                ENT = tf.constant(0.)
                if params["cls_ent_coeff"] > 0.:
                    ENT = tf.keras.losses.categorical_crossentropy(y_pre, y_pre)

                # the new-found samples should be different from each others
                DIV = tf.constant(0.)
                if params["cls_div_coeff"] > 0.:
                    for found_generated in range(i):
                        DIV += tf.keras.losses.MSE(
                            generated,
                            final_latent[found_generated * generated.shape[0]:found_generated * generated.shape[0] + generated.shape[0]]
                        ) / i

                # (NEW) stay on gaussian shell loss:
                SHELL = tf.constant(0.)
                if params["cls_shell_coeff"] > 0.:
                    SHELL = tf.keras.losses.MSE(
                        tf.norm(generated, axis=1),
                        tf.ones_like(tf.norm(generated, axis=1))*np.sqrt(params["z_size"])
                    )

                XENT, ENT, DIV, SHELL = \
                    tf.reduce_mean(XENT), \
                        tf.reduce_mean(ENT), \
                        tf.reduce_mean(DIV), \
                        tf.reduce_mean(SHELL)

                gain = params["cls_xent_coeff"] * XENT + \
                       -params["cls_ent_coeff"] * ENT + \
                       params["cls_div_coeff"] * DIV + \
                       -params["cls_shell_coeff"] * SHELL

            gen_grad = tape.gradient(gain, generated)
            if gen_grad is not None:
                generated = (generated + 1 * gen_grad)

        if final_latent is None:
            final_latent = generated.numpy().copy()
        else:
            final_latent = np.concatenate([final_latent, generated.numpy().copy()])

    tf.stop_gradient(final_latent)

    mir_worked = not np.isnan(final_latent).any()
    mem_x = final_latent if mir_worked else generate(agent, train=False)
    mem_y = agent.classifier(mem_x).numpy()

    return mem_x, mem_y, mir_worked

In [None]:
'''Retrive maximally interferred latent vector for generator'''


def retrieve_gen_for_gen(agent):

    print("Retrieving latent vector for generator...")

    latent = agent.encoder(agent.state["data"])
    #mean_latent = tf.cast(tf.reduce_mean(latent, axis=0), tf.float64)
    final_latent = None

    for i in range(params["n_mem"]):

        generated = generate(agent)

        for j in range(params["mir_iters"]):

            with tf.GradientTape(persistent=True) as tape:
                tape.watch(generated)

                # the predictions from the two models should be confident
                ENT = tf.constant(0.)
                if params["gen_ent_coeff"]>0.:
                    y_pre = agent.classifier(generated)
                    y_pre_true = utils.get_one_hot_predictions(y_pre)
                    ENT = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_pre_true, y_pre))

                # the new-found samples should be different from each others
                DIV = tf.constant(0.)
                if params["gen_div_coeff"]>0.:
                    for found_generated in range(i):
                        DIV += tf.reduce_mean(tf.math.squared_difference(
                            generated,
                            final_latent[found_generated * generated.shape[0]:found_generated * generated.shape[0] + generated.shape[0]])
                        ) / i

                # (NEW) stay on gaussian shell loss:
                SHELL = tf.constant(0.)
                if params["gen_shell_coeff"]>0.:
                    SHELL = tf.reduce_mean(tf.math.squared_difference(
                        tf.norm(generated, ord=2, axis=1),
                        tf.ones_like(tf.norm(generated, ord=2, axis=1))*np.sqrt(params["z_size"])))


                gain =params["gen_div_coeff"] * DIV + \
                      -params["gen_ent_coeff"] * ENT + \
                       -params["gen_shell_coeff"] * SHELL

            grad = tape.gradient(gain, generated)
            generated = (generated + grad)

        if final_latent is None:
            final_latent = tf.identity(generated)
        else:
            final_latent = tf.concat([final_latent, generated], axis=0)


    tf.stop_gradient(final_latent)

    mir_worked = not np.isnan(final_latent).any()
    mem_x = final_latent if mir_worked else generate(agent, train=False)

    return mem_x, mir_worked

In [10]:
'''Train the generator unit'''

def train_generator(agent):
    scaler = lambda x: 2. * x - 1.
    optimizer = torch.optim.Adam(agent.generator.discriminator.parameters(), lr=agent.params["gen_lr"], weight_decay=1e-7)
    loss = torch.nn.BCELoss()
    # Training
    outs = []
    cors = []
    for data in train_loader:
        optimizer.zero_grad()

        inputs, labels, cond = data
        cond = cond.to(agent.params["device"])
        inputs = inputs.to(agent.params["device"])
        labels = labels.to(agent.params["device"])
        inputs = scaler(inputs)

        # Data perturbation
        t, _ = utils.vpsde.get_diffusion_time(inputs.shape[0], inputs.device)
        mean, std = utils.vpsde.marginal_prob(t)
        z = torch.randn_like(inputs)
        perturbed_inputs = mean[:, None, None, None] * inputs + std[:, None, None, None] * z

        # Forward
        with torch.no_grad():
            pretrained_feature = agent.generator.encoder(perturbed_inputs, timesteps=t, feature=True)
        label_prediction = agent.generator.discriminator(pretrained_feature, t, sigmoid=True, condition=cond).view(-1)

        # Backward
        out = loss(label_prediction, labels)
        out.backward()
        optimizer.step()

        # Report
        cor = ((label_prediction > 0.5).float() == labels).float().mean()
        outs.append(out.item())
        cors.append(cor.item())
        return outs, cors

In [11]:
'''Train the encoder and the classifier unit'''

def train_classifier(agent):

    data = agent.state["data"]
    target = agent.state["target"]
    mem_x, mem_y = None, None

    for it in range(agent.params["cls_iters"]):
        history = agent.classifier.fit(data, target, batch_size=agent.params["batch_size"], epochs=1, verbose=0)
        #if agent.state["task"] > 0:
         #   if it == 0 or not agent.params["reuse_samples"]:
          #      mem_x, mem_y, mir_worked = retrieve_gen_for_cls(agent)
           #     agent.state["mir_tries"] += 1
            #    if mir_worked:
             #       agent.state["mir_success"] += 1

            #if mem_x is not None:
             #   mem_history = agent.classifier.fit(mem_x, utils.get_one_hot_predictions(mem_y), batch_size=agent.params["batch_size"], epochs=1, verbose=1)

             #   agent.state["epoch_eval"]["retr_cls_loss"].append(mem_history.history["loss"][0])
            #    agent.state["epoch_eval"]["retr_cls_accuracy"].append(mem_history.history["accuracy"][0])
        agent.state["epoch_eval"]["cls_loss"].append(history.history["loss"][0])
        agent.state["epoch_eval"]["cls_acc"].append(history.history["accuracy"][0])

In [12]:
'''Run an epoch'''

def run_cls_epoch(agent):

    print(f"Running Task {agent.state['task']}, Epoch: {agent.state['epoch']} on Classifier")

    agent.state["epoch_eval"]["cls_loss"] = []
    agent.state["epoch_eval"]["cls_acc"] = []
    agent.state["epoch_eval"]["retr_cls_loss"] = []
    agent.state["epoch_eval"]["retr_cls_accuracy"] = []

    agent.state["sample_amt"] = 0
    for i, (data, target) in enumerate(agent.state["tr_loader"]):
        agent.state["data"] = data
        agent.state["target"] = target
        train_classifier(agent)

    '''Evaluate the models in epoch'''
    if agent.state["epoch"] % agent.params["print_every"] == 0:
        print(f"    Classifier loss: {np.mean(agent.state['epoch_eval']['cls_loss'])}"
              f"    Classifier accuracy: {np.mean(agent.state['epoch_eval']['cls_acc'])}")

In [13]:
'''Run an epoch'''

def run_gen_epoch(agent):
  
    print(f"Running Task {agent.state['task']}, Epoch: {agent.state['epoch']} on Generator")
    
    agent.state["epoch_eval"]["gen_loss"] = []
    agent.state["epoch_eval"]["retr_gen_loss"] = []
    agent.state["epoch_eval"]["retr_gen_accuracy"] = []

    agent.state["sample_amt"] = 0
    real_loader = agent.state["tr_loader"]

    for i, (X_real, y_real) in enumerate(real_loader):
        X_gen, y_gen = generate(agent, 0 , 0.01, 1.0, 0, 0, agent.params["batch_size"])

        train_data = np.concatenate((X_real, X_gen))
        train_label = torch.zeros(train_data.shape[0])
        train_label[:X_real.shape[0]] = 1.
        transform = transforms.Compose([transforms.ToTensor()])
        condition_label = np.concatenate((y_real, y_gen))
        train_dataset = RealFakeConditionalDataset(train_data, train_label, condition_label, transform)

        agent.state["real_fake_loader"]  = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
        outs, cors = train_generator(agent)
        print(f"{i}-th batch BCE loss: {np.mean(outs)}, correction rate: {np.mean(cors)}")

    #'''Evaluate the models in epoch'''
    #if agent.state["epoch"] % agent.params["print_every"] == 0:
    #    print(f"    Generator loss: {np.mean(agent.state['epoch_eval']['gen_loss'])}")
              #f"    Loss on gen retrieve for cls: {np.mean(agent.state['epoch_eval']['retr_cls_loss'])}"
              #f"    Loss on gen retrieve for gen: {np.mean(agent.state['epoch_eval']['retr_gen_loss'])}")

In [14]:
'''Run a task'''

def run_task(agent):

    agent.state["mir_tries"], agent.state["mir_success"] = 0, 0
    agent.state["epoch_eval"] = dict()

    for epoch in range(agent.params["cls_epochs"]):
        agent.state["epoch"] = epoch
        run_cls_epoch(agent)

    for epoch in range(agent.params["gen_epochs"]):
        agent.state["epoch"] = epoch
        run_gen_epoch(agent)

    #'''Evaluate forgetting'''
    #if (agent.state['task']) > 0:
    #  print("Evaluate Task: ", agent.state["task"])
    #for i in range(agent.state["task"]):
    #    task_loss = []
    #    task_eval = []
    #    for data, target in test_loader[i]:
    #        logits = agent.encoder_classifier(data)
    #        pred = np.argmax(logits, axis=1)
    #        y = np.argmax(target, axis=1)
    #        eval = agent.eval(y, pred)
    #        task_eval.append(eval)
#            loss = tf.keras.losses.categorical_crossentropy(target, logits)
 ##           task_loss.append(np.mean(loss))
  #      print(f"    Task {agent.state['task']} forgetting on task {i} : "
  #            f"        Loss: {np.mean(task_loss)}"
   #           f"        ACC: {np.mean(task_eval)}")
#
 #   #print("MIR success rate: ", agent.state["mir_success"] / agent.state["mir_tries"])

In [None]:
'''Run the experiment'''

def run(agent):
  agent.state["classes_learned"] = []
  for r in range(agent.params["n_runs"]):
    agent.state["run"] = r
    print(f"Run {r}")
    for task, (tr_loader, ts_loader) in enumerate(zip(train_loader, test_loader)):
        agent.state["task"] = task
        agent.state["tr_loader"] = tr_loader
        agent.state["ts_loader"] = ts_loader
        run_task(agent)
        #agent.state["classes_learned"].append(task)

# Training

In [15]:
encoder_f = '/drive/MyDrive/continual-learning-ait/checkpoints/32x32_classifier.pt'
scorenet_f = '/drive/MyDrive/continual-learning-ait/checkpoints/edm-cifar10-32x32-cond-vp.pkl'

agent = Agent(params)
agent.set_models(
    _classifier=Classifier(),
    _generator=Generator(encoder_path=encoder_f),
)
#run(agent)

TypeError: ignored

# Eval

In [None]:
''' Evaluation'''
for task, loader in enumerate(test_loader):
    print("Task: ", task)
    LOSS = []
    ACC = []
    for data, target in loader:
      logits = agent.classifier_model(data)
      pred = np.argmax(logits, axis=1)
      report = agent.eval(np.argmax(target, axis=1), pred)
      loss = tf.keras.losses.categorical_crossentropy(target, logits)
      #print(report)
      ACC.append(report)
      LOSS.append(loss)
    print("Mean loss: ", np.mean(LOSS))
    print("Mean accuracy: ", np.mean(ACC))
    print("\n")

# Evaluation, testing

In [None]:
def evaluate(loader, first_n_tasks=None):
    for task, tr_loader in enumerate(loader):
        print("Task: ", task)
        data, target = tr_loader.batch(124)
        logits = agent.classifier_model(data)
        pred = np.argmax(logits, axis=1)
        report = agent.eval(np.argmax(target, axis=1), pred)
        loss = tf.keras.losses.categorical_crossentropy(target, logits)
        print(report)
        print("Mean loss: ", np.mean(loss))

In [None]:
print("Evaluation on training set:")
evaluate(train_loader)
print("Evaluation on test set:")
evaluate(test_loader)

# Utils for development

In [None]:
# Reload modules
importlib.reload()

<module 'stable_diffusion.stable_diffusion' from '/Users/laszlofreund/PycharmProjects/continual-learning-ait/stable_diffusion/stable_diffusion.py'>

In [None]:
# Garbage collection
gc.collect()

21547