In [None]:
!pip3 install -r requirements.txt

In [80]:
from chad_score import ChadPredictor
import torch
from typing import List
import hashlib
import json
import math

from stable_diffusion.model.clip_text_embedder import CLIPTextEmbedder
from stable_diffusion.model.clip_image_encoder import CLIPImageEncoder
from stable_diffusion import StableDiffusion
from stable_diffusion.constants import ModelsPathTree
from stable_diffusion.utils.utils import (
    get_device,
    get_memory_status,
    to_pil,
    save_image_grid,
    show_image_grid,
)

from os.path import join
import os, sys

base_dir = "./"
sys.path.insert(0, base_dir)

# Variables
SEED = 1337
NOISE_MAX_MULTIPLIER = 0.5
BATCH_SIZE = 1
POPULATION_SIZE = 12
GEN_IMAGE_N_GENERATIONS = 50
N_STEPS = 10
EMBEDDED_PROMPTS_DIR = os.path.abspath(join(base_dir, "./input/embedded_prompts/"))
OUTPUT_DIR = os.path.abspath(
    os.path.join(base_dir, "./output/ga/")
)
IMAGES_DIR = os.path.abspath(join(OUTPUT_DIR, "images/"))
FEATURES_DIR = os.path.abspath(join(OUTPUT_DIR, "features/"))

NULL_PROMPT = ""
PROMPT = [
    "Bedroom interior, mid century modern, retro, vintage, designer furniture made of wood and plastic, concrete nightstand, wood walls, potted plant on a shelf, large window, outdoor cityscape view, beautiful sunset, cinematic, concept art, sustainable architecture, octane render, utopia, ethereal, cinematic light.",
    "Dining room interior, mid century modern, retro, vintage, designer dining set made of wood and plastic, concrete floor, wood paneling, flower vase on a sideboard, large window overlooking a garden, outdoor countryside landscape, beautiful sunset, cinematic, concept art, sustainable architecture, octane render, utopia, ethereal, cinematic light.",
    "Office interior, mid century modern, retro, vintage, designer desk made of wood and plastic, concrete bookshelf, wood-paneled walls, potted plant on a window sill, large window with a view of the ocean, outdoor coastal landscape, beautiful sunset, cinematic, concept art, sustainable architecture, octane render, utopia, ethereal, cinematic light.",
    "Kitchen interior, mid century modern, retro, vintage, designer kitchen island made of wood and plastic, concrete countertops, wood cabinets, flowerpot on a windowsill, large window overlooking a lush garden, outdoor tropical landscape, beautiful sunset, cinematic, concept art, sustainable architecture, octane render, utopia, ethereal, cinematic light.",
    "Study room interior, mid century modern, retro, vintage, designer study desk made of wood and plastic, concrete floor, wood-paneled walls, potted bonsai tree on a shelf, large window with a view of the mountains, outdoor snowy landscape, beautiful sunset, cinematic, concept art, sustainable architecture, octane render, utopia, ethereal, cinematic light.",
    "Bathroom interior, mid century modern, retro, vintage, designer bathtub and sink made of wood and plastic, concrete walls, potted fern on a ledge, large window overlooking a tranquil lake, outdoor lakeside landscape, beautiful sunset, cinematic, concept art, sustainable architecture, octane render, utopia, ethereal, cinematic light.",
    "Studio interior, mid century modern, retro, vintage, designer studio set-up made of wood and plastic, concrete flooring, wood accent walls, flowerpot on a table, large window with a view of a futuristic cityscape, outdoor urban landscape, beautiful sunset, cinematic, concept art, sustainable architecture, octane render, utopia, ethereal, cinematic light.",
    "Lounge interior, mid century modern, retro, vintage, designer lounge chairs made of wood and plastic, concrete coffee table, wood-paneled walls, potted cactus on a shelf, large window overlooking a desert landscape, beautiful sunset, cinematic, concept art, sustainable architecture, octane render, utopia, ethereal, cinematic light."
]

# DEVICE = input("Set device: 'cuda:i' or 'cpu'")
DEVICE = None
DEVICE = get_device(DEVICE)


print(EMBEDDED_PROMPTS_DIR)
print(OUTPUT_DIR)
print(IMAGES_DIR)
print(FEATURES_DIR)
pt = ModelsPathTree(base_directory=base_dir)

os.makedirs(EMBEDDED_PROMPTS_DIR, exist_ok=True)
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(IMAGES_DIR, exist_ok=True)
os.makedirs(FEATURES_DIR, exist_ok=True)

INFO: `device` is None. Using device  cuda:0.
INFO: Using CUDA device 0: NVIDIA GeForce RTX 3090.
/root/repo-personal/kcg-ml-sd1p4/input/embedded_prompts
/root/repo-personal/kcg-ml-sd1p4/output/ga
/root/repo-personal/kcg-ml-sd1p4/output/ga/images
/root/repo-personal/kcg-ml-sd1p4/output/ga/features


In [52]:
def calculate_sha256(tensor):
    if tensor.device == "cpu":
        tensor_bytes = tensor.numpy().tobytes()  # Convert tensor to a byte array
    else:
        tensor_bytes = tensor.cpu().numpy().tobytes()  # Convert tensor to a byte array
    sha256_hash = hashlib.sha256(tensor_bytes)
    return sha256_hash.hexdigest()


def embed_and_save_prompts(prompts: list, null_prompt=NULL_PROMPT):
    null_prompt = null_prompt
    prompts = prompts

    clip_text_embedder = CLIPTextEmbedder(device=get_device())
    clip_text_embedder.load_submodels(**pt.embedder_submodels)

    null_cond = clip_text_embedder(null_prompt)
    torch.save(null_cond, join(EMBEDDED_PROMPTS_DIR, "null_cond.pt"))
    print(
        "Null prompt embedding saved at: ",
        f"{join(EMBEDDED_PROMPTS_DIR, 'null_cond.pt')}",
    )

    embedded_prompts = clip_text_embedder(prompts)
    torch.save(embedded_prompts, join(EMBEDDED_PROMPTS_DIR, "embedded_prompts.pt"))

    print(
        "Prompts embeddings saved at: ",
        f"{join(EMBEDDED_PROMPTS_DIR, 'embedded_prompts.pt')}",
    )

    get_memory_status()
    clip_text_embedder.to("cpu")
    del clip_text_embedder
    torch.cuda.empty_cache()
    get_memory_status()

    return embedded_prompts, null_cond

def normalized(a, axis=-1, order=2):
    import numpy as np

    l2 = np.atleast_1d(np.linalg.norm(a, order, axis))
    l2[l2 == 0] = 1
    return a / np.expand_dims(l2, axis)

In [53]:
# Generate embeddings for each prompt
embedded_prompts, null_prompt = embed_and_save_prompts(PROMPT)
embedding = embedded_prompts
num_images = embedding.shape[0]

INFO: `device` is None. Using device  cuda:0.
INFO: Using CUDA device 0: NVIDIA GeForce RTX 3090.
INFO: Device given. Using device cuda:0.
INFO: Using CUDA device 0: NVIDIA GeForce RTX 3090.
Null prompt embedding saved at:  /root/repo-personal/kcg-ml-sd1p4/input/embedded_prompts/null_cond.pt
Prompts embeddings saved at:  /root/repo-personal/kcg-ml-sd1p4/input/embedded_prompts/embedded_prompts.pt
Total: 24259 MiB
Free: 14544 MiB
Used: 9715 MiB
Total: 24259 MiB
Free: 15700 MiB
Used: 8559 MiB


In [87]:
# generate noise and add to the embedded prompt
def add_noise_to_embeds(embedded_prompts, noise_multiplier):
    embedded_prompts.mean(dim=2), embedded_prompts.std(dim=2)
    noise = torch.normal(mean=embedded_prompts.mean(dim=2), std=embedded_prompts.std(dim=2))
    dist = torch.distributions.normal.Normal(
        loc=embedded_prompts.mean(dim=2), scale=embedded_prompts.std(dim=2)
    )
    noise = dist.sample(sample_shape=torch.Size([768])).permute(1, 0, 2).permute(0, 2, 1)
    noise.shape

    generator = torch.Generator(device=DEVICE).manual_seed(SEED) if SEED is not None else torch.Generator(device=DEVICE)

    dist = torch.distributions.normal.Normal(
        loc=embedded_prompts.mean(dim=2), scale=embedded_prompts.std(dim=2)
    )
    noise = dist.sample(sample_shape=torch.Size([768])).permute(1, 0, 2).permute(0, 2, 1)

    embedding_e = embedded_prompts + noise_multiplier * noise

    return embedding_e

In [55]:
# Load Stable Diffusion
sd = StableDiffusion(device=DEVICE, n_steps=N_STEPS)
sd.quick_initialize().load_autoencoder(**pt.autoencoder).load_decoder(**pt.decoder)
sd.model.load_unet(**pt.unet)

# Load chadscore and clip
import clip

# Test calculate chadscore
chad = ChadPredictor(768)
chad.load_state_dict(torch.load("/input/models/aesthetic_scorer/sac+logos+ava1-l14-linearMSE.pth") )
device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cuda":
  chad.to("cuda")
chad.eval()
image_features_clip_model, preprocess = clip.load("ViT-L/14", device=device)  #RN50x64

INFO: Device given. Using device cuda:0.
INFO: Using CUDA device 0: NVIDIA GeForce RTX 3090.
INFO: Device given. Using device cuda:0.
INFO: Using CUDA device 0: NVIDIA GeForce RTX 3090.
Autoencoder loaded from: /root/repo-personal/kcg-ml-sd1p4/input/model/autoencoder/autoencoder.ckpt
Decoder loaded from: /root/repo-personal/kcg-ml-sd1p4/input/model/autoencoder/decoder.ckpt
INFO: `device` is None. Using device  cuda:0.
INFO: Using CUDA device 0: NVIDIA GeForce RTX 3090.


In [62]:
def generate_images_from_embeddings(embedded_prompts_array, null_prompt):
  # 'embedded_prompts_array' is the array of shape (i, 77, 768)
  image = sd.generate_images_from_embeddings(
      seed=SEED, embedded_prompt=embedded_prompts_array[i:i+1], null_prompt=null_prompt
  )
  return image

In [None]:
import pygad
import numpy as np
import random

# Function to calculate the chad score for batch of images
def calculate_chad_score(ga_instance, solution, solution_idx):
  # Convert the solution back to the original shape (1, 77, 768)
  solution_reshaped = solution.reshape(1, 77, 768)

  # Convert the numpy array to a PyTorch tensor
  solution_reshaped = torch.tensor(solution_reshaped, dtype=torch.float32)

  # Copy the tensor to CUDA device if 'device' is 'cuda'
  if device == 'cuda':
    solution_reshaped = solution_reshaped.to(device)

  # Generate an image using the solution
  image = generate_images_from_embeddings(solution_reshaped, null_prompt)

  pil_image = to_pil(image[0])  # Convert to (height, width, channels)
  unsqueezed_image = preprocess(pil_image).unsqueeze(0).to(device)

  with torch.no_grad():
    image_features = image_features_clip_model.encode_image(unsqueezed_image)

    im_emb_arr = normalized(image_features.cpu().detach().numpy() )
    prediction = chad(torch.from_numpy(im_emb_arr).to(device).type(torch.cuda.FloatTensor))
    chad_score = prediction.item()
  return chad_score

# Define the GA loop function
def genetic_algorithm_loop(sd, embedded_prompts, null_prompt, generations=10, population_size=POPULATION_SIZE, mutation_rate=0.4, num_parents_mating=2):
    # Move the 'embedded_prompts' tensor to CPU memory
    embedded_prompts_cpu = embedded_prompts.cpu()

    # Reshape the 'embedded_prompts' tensor to a 2D numpy array
    embedded_prompts_array = embedded_prompts_cpu.detach().numpy()
    num_individuals = embedded_prompts_array.shape[0]
    num_genes = embedded_prompts_array.shape[1] * embedded_prompts_array.shape[2]
    embedded_prompts_list = embedded_prompts_array.reshape(num_individuals, num_genes).tolist()

    # Initialize the GA
    ga_instance = pygad.GA(num_generations=generations,
                           num_parents_mating=num_parents_mating,
                           fitness_func=calculate_chad_score,
                           sol_per_pop=population_size,
                           num_genes=num_genes,
                           initial_population=embedded_prompts_list,
                           mutation_percent_genes=mutation_rate*100)

    # Run the GA loop
    for generation in range(ga_instance.num_generations):

        # Mutate each individual in the population with noise
        #for i in range(ga_instance.population.shape[0]):
           # Extract the individual's solution from the population
        #    solution = ga_instance.population[i]

            # Add noise to the solution using the 'add_noise_to_embeds' function
            #noisy_solution = add_noise_to_embeds(solution, random.uniform(0, NOISE_MAX_MULTIPLIER))
            #ga_instance.population[i] = noisy_solution.reshape(-1)

        # Get the best solution from the GA
        best_solution, best_solution_fitness, best_solution_idx = ga_instance.best_solution()

        print("Generation:", generation)
        print("The current best chadscore for our youngsters is:", best_solution_fitness)

        # Save images every N generations (ex. 10)
        if generation % 1 == 0:
          # Convert the solution back to the original shape (1, 77, 768)
          solution_reshaped = best_solution.reshape(1, 77, 768)
          solution_reshaped = torch.tensor(solution_reshaped, dtype=torch.float32)

          # Copy the tensor to CUDA device if 'device' is 'cuda'
          if device == 'cuda':
            solution_reshaped = solution_reshaped.to(device)

          image = generate_images_from_embeddings(solution_reshaped, null_prompt)
          pil_image = to_pil(image[0])
          filename=f"{IMAGES_DIR}/{generation}.png"
          pil_image.show()
          pil_image.save(filename)

    # Get the final best solution and images
    best_solution, best_solution_fitness = ga_instance.best_solution()
    best_solution = best_solution.reshape(1, 77, 768)  # Reshape the best solution to the correct shape
    final_image = sd.generate_images_from_embeddings(embedded_prompt=best_solution, null_prompt=null_prompt)

    # Save the final best solution images
    final_filename=f"{IMAGES_DIR}/final.png"
    pil_final = to_pil(final_image[0])
    pil_final.show()
    pil_final.save(final_filename)

    return best_solution

# Call the GA loop function with your initialized StableDiffusion model
best_solution = genetic_algorithm_loop(sd, embedded_prompts, null_prompt)

print("Best solution found!")

torch.save(embedded_prompts, join(EMBEDDED_PROMPTS_DIR, "embedded_final_solution.pt"))
print("Saving solution...")

In [50]:
# Clean unused loaded models
del preprocess, image_features_clip_model, sd