<a href="https://colab.research.google.com/github/norflin321/ml/blob/main/txt2img_diffusers_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install transformers diffusers accelerate

import torch
from diffusers import StableDiffusionPipeline
from matplotlib import pyplot as plt
import datetime

# diffusers docs: https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img#texttoimage-generation

# model_id = "dreamlike-art/dreamlike-diffusion-1.0"
# model_id = "prompthero/openjourney"
model_id = "XpucT/Deliberate"

# hyperparameters
width, height = 576, 768 # The width and height in pixels of the generated image
steps = 50 # The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference.
guidance = 7.5 # Higher guidance scale encourages to generate images that are closely linked to the text prompt, usually at the expense of lower image quality.
images_to_generate = 3 # The number of images to generate per prompt
seed = None # set seed of previously generated image to reproduce it

# prompts
prompt = "happy, smiling cute capybara, best quality, masterpiece, details, volumetric lighting, dynamical lighting, reflection, reflective textures, depth of field, dramatic shadows, chibi style, highly intricate, highly detailed, highly textured hair, highly textured skin, highly textured clothing, quality render"
negative_prompt = "easynegative, (low quality, worst quality:1.4), bad anatomy, bad composition, out of frame, duplicate, watermark, signature, text, deformed, cripple, ugly, additional arms, additional legs, additional head, two heads, multiple people, group of people"

pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
pipe = pipe.to(device)

for _ in range(images_to_generate):
    current_seed = seed or torch.randint(0, int(1e5), size=(1, 1))[0].item()
    generator = torch.Generator().manual_seed(int(current_seed))
    img = pipe(prompt=prompt, negative_prompt=negative_prompt, width=width, height=height, num_inference_steps=steps, guidance_scale=guidance, generator=generator).images[0]
    time_now = datetime.datetime.now().strftime("%y.%m.%d_%H:%M:%S")
    img.save(f"./{time_now}_{current_seed}.jpg")
    plt.figure(figsize=(width/80, height/80))
    plt.imshow(img)
    plt.axis("off")
    plt.show()
    print("seed:", current_seed)

# TODO: implement img2img pipeline for upscale and GFPGAN (https://huggingface.co/TencentARC/GFPGANv1)


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
