In [1]:
%pip install Levenshtein
%pip install transformers
%pip install accelerate


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
# IMPORTS

import psutil
import json
import os
import random
import time
import torch
import uuid

from datetime import datetime
from Levenshtein import distance
from jinja2 import Template
from PIL import Image 


import os
import re
import torch
from IPython.display import display
from diffusers import StableDiffusionPipeline
from diffusers import DPMSolverMultistepScheduler
from diffusers import AutoencoderKL


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# VARIOUS HELPER FUNCTIONS

### SIMILARITY CALCULATIONS FOR TRACK NAMES

SIMILARITY_DISTANCE = 3
def are_tracks_similar(tracks):
    for i in range(len(tracks)):
        for j in range(i + 1, len(tracks)):
            if distance(tracks[i]['name'], tracks[j]['name']) < SIMILARITY_DISTANCE:
                return True
    return False

def similarity_groups(tracks):
    found_group = [False] * len(tracks)
    groups = []

    for i in range(len(tracks)):
        if found_group[i]:
            continue

        found_group[i] = True
        groups.append([tracks[i]])

        for j in range(i + 1, len(tracks)):
            if found_group[j]:
                continue
            if distance(tracks[i]['name'], tracks[j]['name']) < SIMILARITY_DISTANCE:
                found_group[j] = True

    return groups

### OUTPUT FILE ID NAME
def get_run_id(run_in):
    # model = run_in['model_id'].replace('/', '-')
    return f"{run_in['album_id']}_{run_in['positive-prompt']}_{run_in['negative-prompt']}_{run_in['inference_steps']}_{run_in['guidance_scale']}_{run_in['batch_size']}"


In [5]:
# GET COMPUTER SPECS

platform_info = {}

platform_info['physical_cpu_cores'] = psutil.cpu_count(logical=False)
platform_info['total_cpu_cores'] = psutil.cpu_count(logical=True)

def get_available_device():
    """Helper method to find best possible hardware to run
    Returns:
        torch.device used to run experiments.
        str representation of backend.
    """
    # Check if CUDA is available
    if torch.cuda.is_available():
        return torch.device("cuda"), "cuda"

    # Check if ROCm is available
    if torch.version.hip is not None and torch.backends.mps.is_available():
        return torch.device("rocm"), "rocm"

    # Check if MPS (Apple Silicon) is available
    if torch.backends.mps.is_available():
        return torch.device('cpu'), "mps"

    # Fall back to CPU
    return torch.device("cpu"), "cpu"

# Check device info
device, backend = get_available_device()

# Check for GPU-specific details if CUDA or ROCm is available
if device.type == "cuda":
    cuda_device_count = torch.cuda.device_count()
    cuda_device_name = torch.cuda.get_device_name(0)
    cuda_version = torch.version.cuda
elif device.type == "rocm":
    cuda_device_count = torch.cuda.device_count()
    cuda_device_name = torch.cuda.get_device_name(0)
    cuda_version = torch.version.hip
else:
    cuda_device_count = 0
    cuda_device_name = "N/A"
    cuda_version = "N/A"

platform_info['device'] = device.type
platform_info['backend'] = backend
platform_info['cuda_device_count'] = cuda_device_count
platform_info['cuda_device_name'] = cuda_device_name
platform_info['cuda_version'] = cuda_version

# print(json.dumps(platform_info, indent=4))

In [6]:
# GET ALBUM DATA

file_id = "" # if need for a specific album, put the file name here

if file_id == "":
    album_files = os.listdir('input/albums')
    random_album_file = random.choice(album_files)
else:
    random_album_file = f'{file_id}.json'

with open(f'input/albums/{random_album_file}', 'r') as file:
    album_data = json.load(file)

In [8]:
# POSITIVE PROMPT TEMPLATES

pos_prompt_templates = {}

pos_prompt_templates['1-long'] =  Template("""\
Album cover for this album:
Album name : {{ album.name }}
Artist{% if album.artists|length > 1 %}s{% endif %} : {{ album.artists | join(', ') }}
Release Date : {{ album.date }}
Label : {{ album.label }}
Tracks:
{% for track in album.tracks %}- {{ track.name }}\n{% endfor %}
""")

pos_prompt_templates['2-only-tracks'] =  Template("""\
Album cover for these tracks: 
{% for track in album.tracks %}- {{ track.name }}\n{% endfor %}
""")

pos_prompt_templates['3-long-with-track-similarity'] =  Template("""\
Album cover for this album:
Album name : {{ album.name }}
Artist{% if album.artists|length > 1 %}s{% endif %} : {{ album.artists | join(', ') }}
Release Date : {{ album.date }}
Label : {{ album.label }}

{% if are_tracks_similar(album.tracks) %} Track format : {% for track in similarity_groups(album.tracks) %}- {{ track.name }}\n{% endfor %}
{% else %} Tracks:
{% for track in album.tracks %}- {{ track.name }}\n{% endfor %}{% endif %}
""")


In [11]:
# NEGATIVE PROMPT TEMPLATES

neg_prompts = {}
neg_prompts['1-no-text'] = "text"

In [12]:
# OTHER PARAMETERS

INFERENCE_STEPS = [20, 100] # the lower the faster but loses in quality
GUIDANCE_SCALE = [5, 10] # the higher the more it follows the prompt BUT loses in creativity
BATCH_SIZE = [1, 2] # Number of images to generate in parallel
#MODELS = ['sd-legacy/stable-diffusion-v1-5', 'stabilityai/stable-diffusion-2']

In [13]:
# CREATE RUN PARAMETERS

runs = []

is_similar = are_tracks_similar(album_data['tracks'])

for pos_key, template in pos_prompt_templates.items():
    run_input = {
        'computer_specs': platform_info,
        'album_id': album_data['id'],
    }

    # Skip prompt if the tracks are not similar
    if pos_key == '3-long-with-track-similarity' and not is_similar:
        print(f"Skipping prompt {pos_key} as tracks are not similar.")
        continue

    run_input['positive-prompt'] = pos_key

    for neg_key, neg_prompt in neg_prompts.items():
        run_input['negative-prompt'] = neg_key

        for step in INFERENCE_STEPS:
            run_input['inference_steps'] = step

            for scale in GUIDANCE_SCALE:
                run_input['guidance_scale'] = scale

                for batch in BATCH_SIZE:
                    run_input['batch_size'] = batch

                    #for model in MODELS:
                    #run_input['model_id'] = model
                    runs.append(run_input.copy())

print(f"Total runs: {len(runs)}")

Skipping prompt 3-long-with-track-similarity as tracks are not similar.
Total runs: 16


In [None]:
# #LOAD MODEL

# Stable Diffusion model: https://huggingface.co/stabilityai/stable-diffusion-2
pipe = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2", torch_dtype=torch.float32) 
pipe = pipe.to("cuda")  # Use "cpu" if CUDA is not available
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) 

#to improve quality 
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float32).to("cuda")
pipe.vae = vae   

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Fetching 13 files:  31%|███       | 4/13 [01:35<03:34, 23.85s/it]


In [None]:
# RUNS

curr_time = datetime.now().strftime("%Y%m%d_%H%M%S")

output_dir = f'output/{curr_time}'
os.makedirs(f'{output_dir}/images', exist_ok=True)
os.makedirs(f'{output_dir}/runs', exist_ok=True)

for run_input in runs:
    # get ids for file naming
    img_id = uuid.uuid4().hex[:8]
    run_id = get_run_id(run_input)

    positive_prompt = pos_prompt_templates[run_input['positive-prompt']].render(album=album_data, are_tracks_similar=are_tracks_similar, similarity_groups=similarity_groups)
    negative_prompt = neg_prompts[run_input['negative-prompt']]

    #model_id = run_input['model_id']
    inference_steps = run_input['inference_steps']
    guidance_scale = run_input['guidance_scale']
    batch_size = run_input['batch_size']

    start_time = time.time()
    
    # TODO 1 : run the model here (remove temp images)
    imgs = []
    #for i in range(batch_size):
    #    imgs.append(Image.new('RGB', (256, 256), color = (73, 109, 137)))

    imgs.append(pipe(
                prompt= positive_prompt,
                negative_prompt= negative_prompt,
                guidance_scale = guidance_scale, 
                num_inference_steps = inference_steps,  
                batch_size = batch_size,    
            ).images[0]
    )

    end_time = time.time()
    
    # TODO 2 : choose the measures to be returned
    run_info = run_input
    run_info['execution_time'] = end_time - start_time
    run_info['resolution'] = 'N/A'
    run_info['colour_quality'] = 'N/A'
    run_info['ssim'] = 'N/A'
    run_info['clip'] = 'N/A'
    run_info['image_id'] = img_id

    # Save the image(s)
    for i, img in enumerate(imgs):
        img.save(f"{output_dir}/images/{img_id}_{i}.png")

    # Save the run info
    with open(f"{output_dir}/runs/{run_id}.json", 'w') as f:
        json.dump(run_info, f, indent=4)
