<a href="https://colab.research.google.com/github/lacykaltgr/continual-learning-ait/blob/experiment/generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
'''Download the files '''
'''Only for colab'''

!wget https://github.com/lacykaltgr/continual-learning-ait/archive/refs/heads/experiment.zip
!unzip experiment.zip
!find continual-learning-ait-experiment -type f ! -name "main.ipynb" -exec cp {} . \;

!rm -r stable_diffusion
!rm -r models
!mkdir stable_diffusion
!mkdir models
!mv diffusion_model.py stable_diffusion/
!mv autoencoder_kl.py stable_diffusion/
!mv layers.py stable_diffusion/
!mv stable_diffusion.py stable_diffusion/
!mv constants.py stable_diffusion/
!mv encoder.h5 models/
!mv classifier.h5 models/

--2023-05-10 13:48:43--  https://github.com/lacykaltgr/continual-learning-ait/archive/refs/heads/experiment.zip
Resolving github.com (github.com)... 20.205.243.166
Connecting to github.com (github.com)|20.205.243.166|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://codeload.github.com/lacykaltgr/continual-learning-ait/zip/refs/heads/experiment [following]
--2023-05-10 13:48:43--  https://codeload.github.com/lacykaltgr/continual-learning-ait/zip/refs/heads/experiment
Resolving codeload.github.com (codeload.github.com)... 20.205.243.165
Connecting to codeload.github.com (codeload.github.com)|20.205.243.165|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/zip]
Saving to: ‘experiment.zip’

experiment.zip          [       <=>          ]   1.58M  1.18MB/s    in 1.3s    

2023-05-10 13:48:45 (1.18 MB/s) - ‘experiment.zip’ saved [1657860]

Archive:  experiment.zip
e7e6f0b439c19f52204ca7f0b7b16f0dfdb98b06
  

In [6]:
from keras.models import load_model
import tensorflow as tf
import keras
import numpy as np
from keras.layers import Conv2D
import math
from stable_diffusion.constants import _ALPHAS_CUMPROD

TypeError: Unable to convert function return value to a Python type! The signature was
	() -> handle

In [3]:
def load_cifar_10():
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()
    n_classes = 10
    X_train = (X_train / 127.5) -1
    X_test = (X_test / 127.5) -1
    y_train = tf.keras.utils.to_categorical(y_train, n_classes)
    y_test = tf.keras.utils.to_categorical(y_test, n_classes)
    return (X_train, y_train), (X_test, y_test)

In [4]:
encoder = load_model("models/encoder.h5")
classifier = load_model("models/classifier.h5")

encoder.compile(
    optimizer=keras.optimizers.Adam(learning_rate=5e-3),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy']
)

classifier.compile(
    optimizer=keras.optimizers.Adam(learning_rate=5e-3),
    loss=keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy']
)



In [None]:
def apply_seq(x: object, layers: object) -> object:
    for l in layers:
        x = l(x)
    return x

In [None]:
class ResBlock(keras.layers.Layer):
    def __init__(self, channels, out_channels):
        super().__init__()
        self.in_layers = [
            keras.layers.GroupNormalization(epsilon=1e-5),
            keras.activations.swish,
            Conv2D(out_channels, 3, strides=(1, 1), padding='same'),
        ]
        self.emb_layers = [
            keras.activations.swish,
            keras.layers.Dense(out_channels),
        ]
        self.out_layers = [
            keras.layers.GroupNormalization(epsilon=1e-5),
            keras.activations.swish,
            Conv2D(out_channels, 3, strides=(1, 1), padding='same'),
        ]
        self.skip_connection = (
            Conv2D(out_channels, 3, strides=(1, 1), padding='same') if channels != out_channels else lambda x: x
        )

    def call(self, inputs):
        x, emb = inputs
        h = apply_seq(x, self.in_layers)
        emb_out = apply_seq(emb, self.emb_layers)
        h = h + emb_out[:, None, None]
        h = apply_seq(h, self.out_layers)
        ret = self.skip_connection(x) + h
        return ret

In [27]:
class UNetModel(keras.Model):
    def __init__(self):
        print("UNetModel init")
        super().__init__()
        self.img_height = 32
        self.img_width = 32
        self.ntype = tf.float32
        self.time_embed = [
            keras.layers.Dense(128),
            keras.activations.swish,
            keras.layers.Dense(128),
        ]
        self.input_blocks = [
            [Conv2D(
                32, 3, strides=(1, 1), padding='same'
            )],
            [ResBlock(32, 32)],
            [ResBlock(32, 32)],
            [Conv2D(32, 3, strides=(2, 2), padding='same')], #downsample
            [ResBlock(32, 64)], #, Conv2D(640, kernel_size=8, padding="same")],
            [ResBlock(64, 64)], #, Conv2D(640, kernel_size=8, padding="same")],
            [Conv2D(64, 3, strides=(2, 2), padding='same')], #downsample
            [ResBlock(64, 128)], #, Conv2D(1280, kernel_size=8, padding="same")],
            [ResBlock(128, 128)], #, Conv2D(1280, kernel_size=8, padding="same")],
            [Conv2D(128, 3, strides=(2, 2), padding='same')], #downsample
            [ResBlock(128, 128)],
            [ResBlock(128, 128)],
        ]
        self.middle_block = [
            ResBlock(64, 64),
            #Conv2D(1280, kernel_size=8, padding="same"),
            ResBlock(64, 64),
        ]
        self.output_blocks = [
            [ResBlock(256, 128)],
            [ResBlock(256, 128)],
            [
                ResBlock(256, 128),
                keras.layers.UpSampling2D(size=(2, 2)),
                Conv2D(128, 3, strides=(1,1), padding=1)
            ],
            [ResBlock(256, 128)], #, Conv2D(1280, kernel_size=8, padding="same")],
            [ResBlock(256, 128)], #, Conv2D(1280, kernel_size=8, padding="same")],
            [
                ResBlock(192, 128),
                #Conv2D(1280, kernel_size=8, padding="same"),
                keras.layers.UpSampling2D(size=(2, 2)),
                Conv2D(128, 3, strides=(1,1), padding=1)
            ],
            [ResBlock(192, 64)], #, Conv2D(640, kernel_size=8, padding="same")],  # 6
            [ResBlock(128, 64)], #, Conv2D(640, kernel_size=8, padding="same")],
            [
                ResBlock(96, 64),
                #Conv2D(640, kernel_size=8, padding="same"),
                keras.layers.UpSampling2D(size=(2, 2)),
                Conv2D(64, 3, strides=(1,1), padding=1)
            ],
            [ResBlock(96, 32)], #, Conv2D(320, kernel_size=8, padding="same")],
            [ResBlock(64, 32)], #, Conv2D(320, kernel_size=8, padding="same")],
            [ResBlock(64, 32)], #, Conv2D(320, kernel_size=8, padding="same")],
        ]
        self.out = [
            keras.layers.GroupNormalization(epsilon=1e-5),
            keras.activations.swish,
            Conv2D(8, 3, strides=(1,1), padding=1),
        ]

    def call(self, inputs):
        x, t_emb = inputs
        emb = apply_seq(t_emb, self.time_embed)

        def apply(x, layer):
            return layer([x, emb]) if isinstance(layer, ResBlock) else layer(x)

        saved_inputs = []
        for b in self.input_blocks:
            for layer in b:
                x = apply(x, layer)
            saved_inputs.append(x)

        for layer in self.middle_block:
            x = apply(x, layer)

        for b in self.output_blocks:
            x = tf.concat([x, saved_inputs.pop()], axis=-1)
            for layer in b:
                x = apply(x, layer)

        return apply_seq(x, self.out)

    def initialize(self, params, input_latent=None, batch_size=64):
        timesteps = np.arange(1, params['num_steps']+ 1)
        input_lat_noise_t = timesteps[int(len(timesteps)* params["input_latent_strength"])]
        latent, alphas, alphas_prev = self.get_starting_parameters(
            timesteps, batch_size, input_latent=input_latent, input_lat_noise_t=input_lat_noise_t
        )
        timesteps = timesteps[: int(len(timesteps)*params["input_latent_strength"])]
        return latent, alphas, alphas_prev, timesteps


    def get_x_prev(self, x, e_t, a_t, a_prev, temperature):
        sigma_t = 0
        sqrt_one_minus_at = math.sqrt(1 - a_t)
        pred_x0 = x - sqrt_one_minus_at * e_t / math.sqrt(a_t)

        # Direction pointing to x_t
        dir_xt = math.sqrt(1.0 - a_prev - sigma_t**2) * e_t
        #noise = sigma_t * tf.random.normal(x.shape, seed=seed) * temperature
        x_prev = math.sqrt(a_prev) * pred_x0 + dir_xt
        return x_prev


    def get_model_output(self, latent, timestep, batch_size):
        timesteps = tf.convert_to_tensor([timestep], dtype=tf.float32)
        t_emb = self.timestep_embedding(timesteps)
        t_emb = tf.repeat(t_emb, repeats=batch_size, axis=0)
        latent = self.call([latent, t_emb])
        return latent


    def timestep_embedding(self, timesteps, dim=320, max_period=10000):
        half = dim // 2
        freqs = np.exp(
            -math.log(max_period) * np.arange(0, half, dtype="float32") / half
        )
        args = np.array(timesteps) * freqs
        embedding = np.concatenate([np.cos(args), np.sin(args)])
        return tf.convert_to_tensor(embedding.reshape(1, -1), dtype=self.ntype)



    # for model with input latent

    def add_noise(self, x, t, noise=None):
        if len(x.shape) == 3:
            x = tf.expand_dims(x, axis=0)
        batch_size, w, h, c = x.shape[0], x.shape[1], x.shape[2], x.shape[3]
        if noise is None:
            noise = tf.random.normal((batch_size, w, h, c), dtype=tf.float32)
        sqrt_alpha_prod = tf.cast(_ALPHAS_CUMPROD[t] ** 0.5, tf.float32)
        sqrt_one_minus_alpha_prod = (1 - _ALPHAS_CUMPROD[t]) ** 0.5

        return sqrt_alpha_prod * x + sqrt_one_minus_alpha_prod * noise

    def get_starting_parameters(self, timesteps, batch_size,  input_latent=None, input_lat_noise_t=None):
        n_h = self.img_height // 8
        n_w = self.img_width // 8
        alphas = [_ALPHAS_CUMPROD[t] for t in timesteps]
        alphas_prev = [1.0] + alphas[:-1]
        if input_latent is None:
            latent = tf.random.normal((batch_size, n_h, n_w, 8))
        else:
            input_latent = tf.cast(input_latent, self.ntype)
            #latent = tf.repeat(input_latent , batch_size , axis=0)
            latent = self.add_noise(input_latent, input_lat_noise_t)
        return latent, alphas, alphas_prev


In [6]:
def get_one_hot_predictions(mem_pred):
    max_indices = np.argmax(mem_pred, axis=1)  # Find the indices of the maximum probabilities along axis 1
    num_classes = mem_pred.shape[1]  # Get the number of classes

    # Create an array of zeros with the same number of rows as mem_pred and num_classes columns
    mem_true = np.zeros_like(mem_pred)

    # Set the value at the corresponding max_indices positions to 1
    mem_true[np.arange(len(max_indices)), max_indices] = 1

    return mem_true

In [15]:
'''Generate samples and train the diffusion model at the same time'''

def generate(cls=classifier, input_latent=None, train=True, coeff=1.0):

    batch_size = params['batch_size'] if train else 64
    latent, alphas, alphas_prev, timesteps = model.initialize(params, input_latent, batch_size)


    for index, timestep in reversed(list(enumerate(timesteps))):
        if train:
            with tf.GradientTape() as tape:
                e_t = model.get_model_output(
                    latent,
                    timestep,
                    batch_size,
                )
                a_t, a_prev = alphas[index], alphas_prev[index]
                latent = model.get_x_prev(latent, e_t,  a_t, a_prev, params["temperature"])

                pred = cls(latent)
                #loss based on confidence
                #ENT = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_pre, logits=y_pre))
                loss = coeff*tf.keras.losses.categorical_crossentropy(pred, pred)
            grads = tape.gradient(loss, model.trainable_variables)
            tf.keras.optimizers.legacy.Adam(learning_rate=params["gen_lr"]).apply_gradients(zip(grads, model.trainable_variables))
        else:
            e_t = model.get_model_output(
                latent,
                timestep,
                batch_size,
            )
            a_t, a_prev = alphas[index], alphas_prev[index]
            latent = model.get_x_prev(latent, e_t,  a_t, a_prev, params["temperature"])

    return latent

In [7]:
'''
def run_gen_epoch():


    for i, (data, target) in tqdm(enumerate(agent.state["tr_loader"])):
        #if agent.state["sample_amt"] > agent.params["samples_per_task"] > 0: break
        if data.shape[0] != batch_size: break
        agent.state["sample_amt"] += data.shape[0]

        agent.state["data"] = data
        agent.state["target"] = target
        agent.state["i_example"] = i

        data = agent.state["data"]
        latent = agent.gen.encoder(data)
        mem_x = None

        for it in range(agent.params["gen_iters"]):
            generate(agent, input_latent=latent)
    if agent.state["epoch"] % agent.params["print_every"] == 0:

        print("\nEvaluate generator on Task: ", agent.state["task"], " Epoch: ", agent.state["epoch"])
        loss = []
        for i, (data, target) in tqdm(enumerate(agent.state["ts_loader"])):

            mem_x = generate(agent, input_latent=agent.encoder(data), train=False)
            mem_pred = agent.cls(mem_x)
            mem_loss = tf.keras.losses.categorical_crossentropy(mem_pred, mem_pred)
            loss.append(np.mean(mem_loss))

        print("Loss on generate: ",  np.mean(mem_loss))
'''

'\ndef run_gen_epoch():\n\n\n    for i, (data, target) in tqdm(enumerate(agent.state["tr_loader"])):\n        #if agent.state["sample_amt"] > agent.params["samples_per_task"] > 0: break\n        if data.shape[0] != batch_size: break\n        agent.state["sample_amt"] += data.shape[0]\n\n        agent.state["data"] = data\n        agent.state["target"] = target\n        agent.state["i_example"] = i\n\n        data = agent.state["data"]\n        latent = agent.gen.encoder(data)\n        mem_x = None\n\n        for it in range(agent.params["gen_iters"]):\n            generate(agent, input_latent=latent)\n    if agent.state["epoch"] % agent.params["print_every"] == 0:\n\n        print("\nEvaluate generator on Task: ", agent.state["task"], " Epoch: ", agent.state["epoch"])\n        loss = []\n        for i, (data, target) in tqdm(enumerate(agent.state["ts_loader"])):\n\n            mem_x = generate(agent, input_latent=agent.encoder(data), train=False)\n            mem_pred = agent.cls(mem_x

In [28]:
model = UNetModel()

UNetModel init


In [25]:
params = {
    "num_steps": 2,
    "input_latent_strength":0.8,
    "temperature": 0.9,
    "batch_size": 256,
    "gen_lr": 2e-5,
    "n_epoch": 3,
}

In [10]:
(X_train, y_train), (X_test, y_test) = load_cifar_10()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [29]:
for epoch in range(params["n_epoch"]):
    loss = []
    for i in range(0, X_train.shape[0], params["batch_size"]):
        X_batch = X_train[i:i+params["batch_size"]]
        y_batch = y_train[i:i+params["batch_size"]]
        latent = encoder(X_batch)
        mem_x = generate(input_latent=latent, train=True)
        mem_pred = classifier(mem_x)
        mem_loss = tf.keras.losses.categorical_crossentropy(mem_pred, mem_pred)
        loss.append(np.mean(mem_loss))
    print("Loss on generate: ",  np.mean(loss))

InvalidArgumentError: ignored