# Aula 1 - Criando um gerador

## Vídeo 1.2 - Carregando o fmnist

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Carregar o dataset Fashion MNIST
(train_images, train_labels), _ = tf.keras.datasets.fashion_mnist.load_data()

# Normalizar as imagens para o intervalo [-1, 1]
train_images = train_images.reshape(train_images.shape[0], 28, 28, 1).astype('float32')
train_images = (train_images - 127.5) / 127.5  # Normalizar para [-1, 1]

# Definir o batch size e preparar o dataset para treinamento
batch_size = 256

# Criar o dataset de treino
train_ds = tf.data.Dataset.from_tensor_slices(train_images).shuffle(60000).batch(batch_size)

In [None]:
# Exibir algumas imagens de exemplo
num_images_to_show = 10
plt.figure(figsize=(10, 10))
for i in range(num_images_to_show):
    plt.subplot(1, num_images_to_show, i + 1)
    plt.imshow(train_images[i].reshape(28, 28), cmap='gray')
    plt.axis('off')
plt.show()

## Vídeo 1.3 - Estruturando um gerador

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [None]:
def constroi_gerador():
    modelo = Sequential()

    modelo.add(layers.Input(shape=(100,)))
    modelo.add(layers.Dense(7*7*256, use_bias=False))
    modelo.add(layers.BatchNormalization())
    modelo.add(layers.LeakyReLU())

    modelo.add(layers.Reshape((7, 7, 256)))
    modelo.add(layers.Conv2DTranspose(128, (5, 5), strides=(1, 1), padding='same', use_bias=False))
    modelo.add(layers.BatchNormalization())
    modelo.add(layers.LeakyReLU())

    modelo.add(layers.Conv2DTranspose(64, (5, 5), strides=(2, 2), padding='same', use_bias=False))
    modelo.add(layers.BatchNormalization())
    modelo.add(layers.LeakyReLU())

    modelo.add(layers.Conv2DTranspose(1, (5, 5), strides=(2, 2), padding='same', use_bias=False, activation='tanh'))

    return modelo


## Vídeo 1.4 - Gerando uma imagem

In [None]:
# Criação do gerador
gerador = constroi_gerador()

In [None]:
# Gerar Vetor de Ruído
dimensao_ruido = 100
ruido = tf.random.normal([1, dimensao_ruido])

In [None]:
# Gerar imagem a partir do vetor de ruído
imagem_gerada = gerador(ruido, training=False)

In [None]:
# Visualizar a imagem gerada
import matplotlib.pyplot as plt
plt.imshow((imagem_gerada[0] * 127.5 + 127.5).numpy())
plt.axis('off')
plt.show()

# Aula 2 - Implementando um discriminador

## Vídeo 2.1 - Criando a função do discriminador

In [None]:
def constroi_discriminador():
    modelo = Sequential()

    modelo.add(layers.Input(shape=(28, 28, 1)))
    modelo.add(layers.Conv2D(64, (5, 5), strides=(2, 2), padding='same'))
    modelo.add(layers.LeakyReLU())
    modelo.add(layers.Dropout(0.3))

    modelo.add(layers.Conv2D(128, (5, 5), strides=(2, 2), padding='same'))
    modelo.add(layers.LeakyReLU())
    modelo.add(layers.Dropout(0.3))

    # Corrigir a dimensão da Flatten para compatibilizar com a Dense
    modelo.add(layers.Flatten())
    modelo.add(layers.Dense(1))

    return modelo

In [None]:
discriminador = constroi_discriminador()
decisao = discriminador(imagem_gerada)
print(decisao)

## Vídeo 2.2 - Definindo as funções de custo

In [None]:
cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [None]:
def custo_discriminador(real_output, fake_output):
    real_loss = cross_entropy(tf.ones_like(real_output), real_output)
    fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
    total_loss = real_loss + fake_loss
    return total_loss

In [None]:
def custo_gerador(fake_output):
    return cross_entropy(tf.ones_like(fake_output), fake_output)

In [None]:
otimizador_gerador = tf.keras.optimizers.Adam(1e-4)
otimizador_discriminador = tf.keras.optimizers.Adam(1e-4)

In [None]:
import os

In [None]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(generator_optimizer=otimizador_gerador,
                                 discriminator_optimizer=otimizador_discriminador,
                                 generator=gerador,
                                 discriminator=discriminador)

## Vídeo 2.3 - Definindo um loop de treinamento

In [None]:
epocas = 50
dimensao_ruido = 100
numero_exemplos_treinamento = 16

#Vetor de ruido para gerar imagens
seed = tf.random.normal([numero_exemplos_treinamento, dimensao_ruido])

In [None]:
@tf.function
def passo_treino(images):
    ruido = tf.random.normal([batch_size, dimensao_ruido])

    with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
        generated_images = gerador(ruido, training=True)

        real_output = discriminador(images, training=True)
        fake_output = discriminador(generated_images, training=True)

        gen_loss = custo_gerador(fake_output)
        disc_loss = custo_discriminador(real_output, fake_output)

    gradients_of_generator = gen_tape.gradient(gen_loss, gerador.trainable_variables)
    gradients_of_discriminator = disc_tape.gradient(disc_loss, discriminador.trainable_variables)

    otimizador_gerador.apply_gradients(zip(gradients_of_generator, gerador.trainable_variables))
    otimizador_discriminador.apply_gradients(zip(gradients_of_discriminator, discriminador.trainable_variables))

In [None]:
import time
from IPython.display import clear_output

In [None]:
def treino(dataset, epocas):
    for epoca in range(epocas):
        start = time.time()

        for image_batch in dataset:
           passo_treino(image_batch)

        # Produz as imagens para gerar um gif
        clear_output(wait=True)
        gera_e_salva_imagens(gerador,
                                 epoca + 1,
                                 seed)

        # Salva o modelo a cada 15 épocas
        if (epoca + 1) % 15 == 0:
            checkpoint.save(file_prefix=checkpoint_prefix)

        print('Tempo por época {} é {} s'.format(epoca + 1, time.time()-start))

    # Gera após a época final
    clear_output(wait=True)
    gera_e_salva_imagens(gerador,
                                 epoca + 1,
                                 seed)

In [None]:
def gera_e_salva_imagens(modelo, epoca, entrada):
  # Observe que `training` está definido como False.
  # Isso é para que todas as camadas sejam executadas no modo de inferência (batchnorm).
  previsao = modelo(entrada, training=False)

  fig = plt.figure(figsize=(4, 4))

  for i in range(previsao.shape[0]):
      plt.subplot(4, 4, i+1)
      plt.imshow(previsao[i, :, :, 0] * 127.5 + 127.5, cmap='gray')
      plt.axis('off')

  plt.savefig('image_at_epoch_{:04d}.png'.format(epoca))
  plt.show()

## Vídeo 2.4 - Aplicando o treinamento

In [None]:
treino(train_ds, epocas)

In [None]:
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))


In [None]:
gerador.save('modelo_gerador.keras')

In [None]:
from tensorflow.keras.models import load_model

gerador_carregado = load_model('modelo_gerador.keras')

In [None]:
# Gerar um novo vetor de ruído
new_noise = tf.random.normal([1, dimensao_ruido])

# Gerar uma nova imagem usando o modelo carregado
new_generated_image = gerador_carregado(new_noise, training=False)

# Visualizar a nova imagem gerada
plt.imshow((new_generated_image[0] * 127.5 + 127.5).numpy())
plt.axis('off')
plt.show()


# Aula 3 - Construindo um difusor

## Vídeo 3.1 - Adicionando de ruído

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from keras import layers
from tqdm.auto import trange, tqdm

In [None]:
# Carregando o dataset Fashion MNIST
(X_treino, y_treino), (X_teste, y_teste) = tf.keras.datasets.fashion_mnist.load_data()
# Normalizando as imagens para o intervalo [-1, 1]
X_treino = (X_treino / 127.5) - 1.0

# Adicionando um canal extra para as imagens de 28x28
X_treino = np.expand_dims(X_treino, axis=-1)

In [None]:
IMG_SIZE = 28     # Tamanho das imagens de entrada, Fashion MNIST é 28x28
BATCH_SIZE = 128  # Tamanho do lote para treinamento
timesteps = 16    # Quantidade de passos para uma imagem ruidosa se tornar clara
time_bar = 1 - np.linspace(0, 1.0, timesteps + 1) # linspace para timesteps

In [None]:
def cvtImg(img):
    img = img - img.min()
    img = (img / img.max())
    return img.astype(np.float32)

In [None]:
def show_examples(x):
    num_images = x.shape[0]
    plt.figure(figsize=(10, 10))
    for i in range(min(25, num_images)):  # Mostra no máximo 25 imagens
        plt.subplot(5, 5, i+1)
        img = cvtImg(x[i])
        plt.imshow(img.squeeze(), cmap='gray')  # Exibe como imagem em escala de cinza
        plt.axis('off')

In [None]:
show_examples(X_treino)

In [None]:
def forward_noise(x, t):
    a = time_bar[t]      # imagem no tempo t
    b = time_bar[t + 1]  # imagem em t + 1

    ruido = np.random.normal(size=x.shape)  # Gera máscara de ruído
    a = a.reshape((-1, 1, 1, 1))
    b = b.reshape((-1, 1, 1, 1))
    img_a = x * (1 - a) + ruido * a
    img_b = x * (1 - b) + ruido * b
    return img_a, img_b

In [None]:
def generate_ts(num):
    return np.random.randint(0, timesteps, size=num)

In [None]:
# Gera exemplos de treino
t = generate_ts(3)  # Gera timesteps para 25 exemplos
a, b = forward_noise(X_treino[:3], t)
show_examples(a)

## Vídeo 3.2 - Implementando uma U-net

In [None]:
def block(x):
    x = layers.Conv2D(128, kernel_size=3, padding='same')(x)
    x = layers.LayerNormalization()(x)
    x = layers.Activation('relu')(x)
    return x

In [None]:
def make_model():
    # Ajuste na entrada para Fashion MNIST
    x = x_input = layers.Input(shape=(28, 28, 1), name='x_input')

    x_ts = x_ts_input = layers.Input(shape=(1,), name='x_ts_input')
    x_ts = layers.Dense(192)(x_ts)
    x_ts = layers.LayerNormalization()(x_ts)
    x_ts = layers.Activation('relu')(x_ts)

    # ----- left ( down ) -----
    x = x28 = block(x)  # 28x28 -> 28x28
    x = layers.MaxPool2D(2, padding='same')(x)  # 28x28 -> 14x14

    x = x14 = block(x)  # 14x14 -> 14x14
    x = layers.MaxPool2D(2, padding='same')(x)  # 14x14 -> 7x7

    x = x7 = block(x)  # 7x7 -> 7x7
    x = layers.MaxPool2D(2, padding='same')(x)  # 7x7 -> 4x4

    x = x4 = block(x)  # 4x4 -> 4x4

    # ----- MLP -----
    x = layers.Flatten()(x)
    x = layers.Concatenate()([x, x_ts])
    x = layers.Dense(128)(x)
    x = layers.LayerNormalization()(x)
    x = layers.Activation('relu')(x)

    x = layers.Dense(4 * 4 * 32)(x)
    x = layers.LayerNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.Reshape((4, 4, 32))(x)

    # ----- right ( up ) -----
    x = layers.Concatenate()([x, x4])
    x = block(x)
    x = layers.Conv2DTranspose(128, kernel_size=3, strides=2, padding='same')(x)  # 4x4 -> 8x8

    # Ajuste para 7x7
    x = layers.Cropping2D(((0, 1), (0, 1)))(x)  # 8x8 -> 7x7

    x = layers.Concatenate()([x, x7])
    x = block(x)
    x = layers.Conv2DTranspose(128, kernel_size=3, strides=2, padding='same')(x)  # 7x7 -> 14x14

    x = layers.Concatenate()([x, x14])
    x = block(x)
    x = layers.Conv2DTranspose(128, kernel_size=3, strides=2, padding='same')(x)  # 14x14 -> 28x28

    x = layers.Concatenate()([x, x28])
    x = block(x)

    # ----- output -----
    x = layers.Conv2D(1, kernel_size=1, padding='same')(x)
    model = tf.keras.models.Model([x_input, x_ts_input], x)
    return model

In [None]:
model = make_model()

In [None]:
tf.__version__

In [None]:
model.compile(loss=tf.keras.losses.MeanAbsoluteError(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.0008))

## Vídeo 3.3 - Criando funções de previsão

In [None]:
def predict(model, timesteps=50, batch_size=32):
    # Inicialize com ruído gaussiano
    x = np.random.normal(size=(batch_size, 28, 28, 1))

    for i in trange(timesteps):
        t = np.full((batch_size, 1), i)  # Tempo como um vetor coluna
        x = model.predict([x, t], verbose=0)

    # Normalize as imagens para o intervalo [0, 1]
    x = (x - x.min()) / (x.max() - x.min())

    show_examples(x)

In [None]:
def predict_step(model, timesteps=50, num_samples=8):
    xs = []
    x = np.random.normal(size=(num_samples, 28, 28, 1))  # Ajustado para Fashion MNIST

    for i in trange(timesteps):
        t = np.full((num_samples, 1), i)  # Tempo como vetor coluna
        x = model.predict([x, t], verbose=0)
        if i % 5 == 0:  # Salva a cada 5 passos para reduzir o número de imagens
            xs.append(x[0])

    # Normaliza as imagens para o intervalo [0, 1]
    xs = [(x - x.min()) / (x.max() - x.min()) for x in xs]

    plt.figure(figsize=(20, 3))
    for i, img in enumerate(xs):
        plt.subplot(1, len(xs), i+1)
        plt.imshow(cvtImg(img), cmap='gray')
        plt.title(f'Step {i*5}')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

## Vídeo 3.4 - Treinando a U-Net

In [None]:
def train_one(x_img):
    x_ts = generate_ts(len(x_img))
    x_a, x_b = forward_noise(x_img, x_ts)
    loss = model.train_on_batch([x_a, x_ts], x_b)
    return loss

In [None]:
def train(R=50):
    bar = trange(R)
    total = 100
    for i in bar:
        for j in range(total):
            x_img = X_treino[np.random.randint(len(X_treino), size=BATCH_SIZE)]
            loss = train_one(x_img)
            pg = (j / total) * 100
            if j % 5 == 0:
                bar.set_description(f'loss: {loss:.5f}, p: {pg:.2f}%')

In [None]:
train()

In [None]:
predict(model)

In [None]:
predict_step(model)

# Aula 4 - Usando o Stable Diffusion

## Vídeo 4.1 - Gerando com o Stable Diffusion

In [None]:
#%pip uninstall keras keras-core keras-cv tensorflow

In [None]:
#%pip install tensorflow==2.15.1 keras==2.15.0 keras-core==0.1.7 keras-cv==0.9.0 

In [None]:
import time
import keras_cv
from tensorflow import keras
import matplotlib.pyplot as plt

In [None]:
modelo = keras_cv.models.StableDiffusion(img_width=512, img_height=512)

In [None]:
imagens = modelo.text_to_image(
  "Humanoid dog playing guitar ",
  batch_size=3)


In [None]:
def plot_images(imagens):
    plt.figure(figsize=(20, 20))
    for i in range(len(imagens)):
        ax = plt.subplot(1, len(imagens), i + 1)
        plt.imshow(imagens[i])
        plt.axis("off")




In [None]:
plot_images(imagens)

https://www.tensorflow.org/tutorials/generative/generate_images_with_stable_diffusion

## Vídeo 4.2 - Melhorando a precisão

In [None]:
imagens = modelo.text_to_image(
    "Humanoid cat wearing golden jeans, dark fantasy art, "
    "high quality, highly detailed, elegant, sharp focus, "
    "concept art, character concepts, digital painting, mystery, adventure",
    batch_size=3,
)


In [None]:
plot_images(imagens)

In [None]:
keras.mixed_precision.set_global_policy("mixed_float16")

In [None]:
modelo = keras_cv.models.StableDiffusion(img_width=512, img_height=512)

In [None]:
imagens = modelo.text_to_image(
    "Panda wearing a blue hat, dark fantasy art, "
    "high quality, highly detailed, elegant, sharp focus, "
    "concept art, character concepts, digital painting, mystery, adventure",
    batch_size=3,
)

In [None]:
plot_images(imagens)

In [None]:
keras.mixed_precision.set_global_policy("float32")
modelo = keras_cv.models.StableDiffusion(img_width=512, img_height=512,jit_compile=True)

In [None]:
imagens = modelo.text_to_image(
    "Cat wearing a furry hat, dark fantasy art, "
    "high quality, highly detailed, elegant, sharp focus, "
    "concept art, character concepts, digital painting, mystery, adventure",
    batch_size=3,
)

In [None]:
plot_images(imagens)

# Aula 5 - Gerando animações com o Stable Difusion

## Vídeo 5.1 - 

In [None]:
import keras_cv
import keras
import matplotlib.pyplot as plt
from keras import optimizers
import numpy as np
import math
from PIL import Image
import tensorflow as tf

In [None]:
keras.mixed_precision.set_global_policy("mixed_float16")

In [None]:
# Instantiate the Stable Diffusion model
model = keras_cv.models.StableDiffusion(jit_compile=True)

In [None]:
prompt_1 = "Panda wearing a blue hat, dark fantasy art, "
prompt_2 = "Cat wearing a blue hat, dark fantasy art, "
interpolation_steps = 5

encoding_1 = tf.squeeze(model.encode_text(prompt_1))
encoding_2 = tf.squeeze(model.encode_text(prompt_2))

interpolated_encodings = tf.linspace(encoding_1, encoding_2, interpolation_steps)

# Show the size of the latent manifold
print(f"Encoding shape: {encoding_1.shape}")

In [None]:
seed = 12345
noise = tf.random.normal((512 // 8, 512 // 8, 4), seed=seed)

images = model.generate_image(
    interpolated_encodings,
    batch_size=interpolation_steps,
    diffusion_noise=noise,
)

In [None]:
def export_as_gif(filename, images, frames_per_second=10, rubber_band=False):
    if rubber_band:
        images += images[2:-1][::-1]
    images[0].save(
        filename,
        save_all=True,
        append_images=images[1:],
        duration=1000 // frames_per_second,
        loop=0,
    )


In [None]:
export_as_gif(
    "panda-cat.gif",
    [Image.fromarray(img) for img in images],
    frames_per_second=2,
    rubber_band=True,
)

In [None]:
from IPython.display import Image as IImage
IImage("panda-cat.gif")

## Vídeo 5.2 - Interpolação manual dos resultados

In [None]:
interpolation_steps = 150
batch_size = 3
batches = interpolation_steps // batch_size

interpolated_encodings = tf.linspace(encoding_1, encoding_2, interpolation_steps)
batched_encodings = tf.split(interpolated_encodings, batches)

images = []
for batch in range(batches):
    images += [
        Image.fromarray(img)
        for img in model.generate_image(
            batched_encodings[batch],
            batch_size=batch_size,
            num_steps=25,
            diffusion_noise=noise,
        )
    ]

export_as_gif("panda-cat-fino.gif", images, rubber_band=True)

In [None]:
#IImage("panda-cat-fino.gif")

## Vídeo 5.3 - Criando um caminho circular com ruído

In [None]:
import tensorflow as tf
import math

# Definindo a seed para garantir consistência no ruído
seed = 42
tf.random.set_seed(seed)

# Fixando o prompt e a codificação
prompt = "A majestic cat wearing an ornate golden hat, surrounded by floating orbs of light, in a dark illuminist painting, high detail, cinematic lighting, surreal background, elegant fur texture"
encoding = tf.squeeze(model.encode_text(prompt))  # Codificação será sempre a mesma

# Parâmetros de caminhada circular
walk_steps = 150
batch_size = 3
batches = walk_steps // batch_size

# Gerando ruído com a seed fixa
noise = tf.random.normal((512 // 8, 512 // 8, 4), seed=seed)  # Ruído inicial fixo

# Gerando ruídos circulares com a mesma seed
walk_noise_x = tf.random.normal(noise.shape, dtype="float64", seed=seed)
walk_noise_y = tf.random.normal(noise.shape, dtype="float64", seed=seed)

# Caminhada circular usando coseno e seno
walk_scale_x = tf.cos(tf.linspace(0, 4, walk_steps) * math.pi)
walk_scale_y = tf.sin(tf.linspace(0, 4, walk_steps) * math.pi)

# Ruído circular aplicado
noise_x = tf.tensordot(walk_scale_x, walk_noise_x, axes=0)
noise_y = tf.tensordot(walk_scale_y, walk_noise_y, axes=0)
noise = tf.add(noise_x, noise_y)

# Dividindo o ruído em lotes
batched_noise = tf.split(noise, batches)

# Gerando imagens sem passar a seed, já que o ruído é manual
images = []
for batch in range(batches):
    images += [
        Image.fromarray(img)
        for img in model.generate_image(
            encoding,
            batch_size=batch_size,
            num_steps=25,
            diffusion_noise=batched_noise[batch],  # Usando apenas o ruído gerado
        )
    ]

# Exportar como GIF com efeito de "vai e volta"
export_as_gif("cat_hat_variation.gif", images, rubber_band=True)
