# Text to Image using Stable Diffusion by Sparsh Mehta

## Install packages

In [None]:
!pip install diffusers transformers accelerate


## Comparative Experiments for different Diffusion Models with different prompts and parameters

In [None]:
import torch
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler, LMSDiscreteScheduler, DPMSolverMultistepScheduler
from PIL import Image
import time
from tabulate import tabulate
import os
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define the models and their respective schedulers
models = [
    ("stabilityai/stable-diffusion-2-1", EulerDiscreteScheduler),
    ("stabilityai/stable-diffusion-2", EulerDiscreteScheduler),
    ("runwayml/stable-diffusion-v1-5", LMSDiscreteScheduler),
]

# Define the prompts to test
prompts = [
    "An elderly man with a wrinkled face, kind eyes, and a warm smile, sitting on a wooden bench in a peaceful park, surrounded by autumn foliage, digital painting, 4k resolution",
    "A majestic lion with a golden mane, sitting atop a rocky cliff, overlooking a vast savanna at sunset, photorealistic, 8k resolution",
    "A futuristic cityscape with towering skyscrapers, flying cars, and neon lights, in the style of Blade Runner, cinematic, 4k resolution",
]

# Define the settings to experiment with
num_inference_steps_list = [25, 50, 100, 150]
guidance_scale_list = [5.0, 7.5, 10.0, 12.0]

# Store the comparison metrics in a list of dictionaries
comparison_metrics = []

# Set the base folder on Google Drive
base_folder = '/content/drive/MyDrive/results'

# Generate images for each combination of model, prompt, and settings
for model_id, scheduler_cls in models:
    model_name = model_id.split('/')[-1]
    model_folder = f"{base_folder}/{model_name}"
    os.makedirs(model_folder, exist_ok=True)

    for prompt in prompts:
        prompt_folder = f"{model_folder}/{prompt[:50]}"
        os.makedirs(prompt_folder, exist_ok=True)

        comparison_images = []

        for num_inference_steps in num_inference_steps_list:
            for guidance_scale in guidance_scale_list:
                # Load the model and scheduler
                scheduler = scheduler_cls.from_pretrained(model_id, subfolder="scheduler")
                pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
                pipe = pipe.to("cuda")

                # Generate the image and measure the time taken
                start_time = time.time()
                image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images[0]
                end_time = time.time()
                generation_time = end_time - start_time

                # Save the image with the model, prompt, and settings information
                image_name = f"{num_inference_steps}steps_{guidance_scale}scale.png"
                image_path = f"{prompt_folder}/{image_name}"
                image.save(image_path)

                # Store the comparison metrics
                comparison_metrics.append({
                    "Model": model_name,
                    "Prompt": prompt[:50],
                    "Inference Steps": num_inference_steps,
                    "Guidance Scale": guidance_scale,
                    "Generation Time (s)": round(generation_time, 2),
                    "Image Path": image_path
                })

                comparison_images.append(image)

        # Create a comparison image grid
        num_cols = len(num_inference_steps_list)
        num_rows = len(guidance_scale_list)
        grid_image = Image.new('RGB', (num_cols * image.width, num_rows * image.height))

        for i, img in enumerate(comparison_images):
            row = i // num_cols
            col = i % num_cols
            grid_image.paste(img, (col * image.width, row * image.height))

        comparison_image_path = f"{prompt_folder}/comparison.png"
        grid_image.save(comparison_image_path)

# Display the comparison metrics in a tabular format
headers = ["Model", "Prompt", "Inference Steps", "Guidance Scale", "Generation Time (s)", "Image Path"]
rows = []
for metric in comparison_metrics:
    row = [
        metric["Model"],
        metric["Prompt"],
        metric["Inference Steps"],
        metric["Guidance Scale"],
        metric["Generation Time (s)"],
        metric["Image Path"]
    ]
    rows.append(row)

table = tabulate(rows, headers=headers, tablefmt="grid")
print(table)

In [None]:
import pandas as pd

headers = ["Model", "Prompt", "Inference Steps", "Guidance Scale", "Generation Time (s)", "Image Path"]
rows = []
for metric in comparison_metrics:
    row = [
        metric["Model"],
        metric["Prompt"],
        metric["Inference Steps"],
        metric["Guidance Scale"],
        metric["Generation Time (s)"],
        metric["Image Path"]
    ]
    rows.append(row)

df = pd.DataFrame(rows, columns=headers)
df.sort_values(by="Generation Time (s)",inplace=True)
df

In [None]:
import pandas as pd

headers = ["Model", "Prompt", "Inference Steps", "Guidance Scale", "Generation Time (s)", "Image Path"]
rows = []

for metric in comparison_metrics:
    row = [
        metric["Model"],
        metric["Prompt"],
        metric["Inference Steps"],
        metric["Guidance Scale"],
        metric["Generation Time (s)"],
        metric["Image Path"]
    ]
    rows.append(row)

df = pd.DataFrame(rows, columns=headers)

# Get the minimum "Generation Time (s)" for each unique model
min_generation_time = df.groupby('Model')['Generation Time (s)'].min().reset_index()

print(min_generation_time)

                   Model  Generation Time (s)
0     stable-diffusion-2                 3.59
1   stable-diffusion-2-1                 3.47
2  stable-diffusion-v1-5                 1.72


In [None]:
# Import necessary modules
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
import torch

# Set up the model
model_id = "stabilityai/stable-diffusion-2-1"
scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

# Get user input for the prompt
prompt = "a photorealistic image of a man sitting in a cafe working on his laptop and cup of coffee on a desk"

# Set up image generation parameters
num_inference_steps = 100
guidance_scale = 7.5

# Generate the initial image
image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images[0]

# Display the initial image
display(image)

# Upscale the image to 2048x2048 resolution
from PIL import Image
upscaled_image = image.resize((2048, 2048), resample=Image.LANCZOS)

# Display the upscaled image
display(upscaled_image)

# Save the upscaled image
upscaled_image.save("upscaled_image.png")

In [None]:
# Install required libraries

# Import necessary modules
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
import torch

# Set up the model
model_id = "stabilityai/stable-diffusion-2-1"
scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

# Get user input for the prompt
prompt = "A magical forest with bioluminescent plants, enchanted creatures, and a mystical portal, fantasy art style, highly detailed, 8k resolution"

# Set up image generation parameters
num_inference_steps = 100
guidance_scale = 7.5

# Generate the initial image
image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images[0]

# Display the initial image
display(image)

# Upscale the image to 2048x2048 resolution
from PIL import Image
upscaled_image = image.resize((2048, 2048), resample=Image.LANCZOS)

# Display the upscaled image
display(upscaled_image)

# Save the upscaled image
upscaled_image.save("upscaled_image.png")

In [None]:
# Install required libraries

# Import necessary modules
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
import torch

# Set up the model
model_id = "stabilityai/stable-diffusion-2-1"
scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

# Get user input for the prompt
prompt = "An elderly man with a wrinkled face, kind eyes, and a warm smile, sitting on a wooden bench in a peaceful park, surrounded by autumn foliage, digital painting, 4k resolution"

# Set up image generation parameters
num_inference_steps = 100
guidance_scale = 7.5

# Generate the initial image
image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images[0]

# Display the initial image
display(image)

# Upscale the image to 2048x2048 resolution
from PIL import Image
upscaled_image = image.resize((2048, 2048), resample=Image.LANCZOS)

# Display the upscaled image
display(upscaled_image)

# Save the upscaled image
upscaled_image.save("upscaled_image.png")

In [None]:
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
import torch

model_id = "stabilityai/stable-diffusion-2"
scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

prompt = "An elderly man with a wrinkled face, kind eyes, and a warm smile, sitting on a wooden bench in a peaceful park, surrounded by autumn foliage, digital painting, 4k resolution"
num_inference_steps = 100
guidance_scale = 7.5

image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images[0]
display(image)

In [None]:
from diffusers import StableDiffusionPipeline, LMSDiscreteScheduler
import torch

model_id = "runwayml/stable-diffusion-v1-5"
scheduler = LMSDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

prompt = "An elderly man with a wrinkled face, kind eyes, and a warm smile, sitting on a wooden bench in a peaceful park, surrounded by autumn foliage, digital painting, 4k resolution"
num_inference_steps = 100
guidance_scale = 7.5

image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images[0]
display(image)

In [None]:
from diffusers import StableDiffusionUpscalePipeline
import torch

model_id = "stabilityai/stable-diffusion-x4-upscaler"
pipe = StableDiffusionUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

low_res_image = image  # Use the image generated from Stable Diffusion 1.5 or 2.0

prompt = "An elderly man with a wrinkled face, kind eyes, and a warm smile, sitting on a wooden bench in a peaceful park, surrounded by autumn foliage, digital painting, 4k resolution"
upscaled_image = pipe(prompt=prompt, image=low_res_image).images[0]

display(upscaled_image)

In [None]:
import torch
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler, LMSDiscreteScheduler, DPMSolverMultistepScheduler
from PIL import Image
import time

# Define the models and their respective schedulers
models = [
    ("stabilityai/stable-diffusion-2-1", EulerDiscreteScheduler),
    ("stabilityai/stable-diffusion-2", EulerDiscreteScheduler),
    ("runwayml/stable-diffusion-v1-5", LMSDiscreteScheduler),
]

# Define the prompts to test
prompts = [
    "An elderly man with a wrinkled face, kind eyes, and a warm smile, sitting on a wooden bench in a peaceful park, surrounded by autumn foliage, digital painting, 4k resolution",
    "A majestic lion with a golden mane, sitting atop a rocky cliff, overlooking a vast savanna at sunset, photorealistic, 8k resolution",
    "A futuristic cityscape with towering skyscrapers, flying cars, and neon lights, in the style of Blade Runner, cinematic, 4k resolution",
]

# Define the settings to experiment with
num_inference_steps_list = [25, 50, 100, 150]
guidance_scale_list = [5.0, 7.5, 10.0, 12.0]

# Generate images for each combination of model, prompt, and settings
for model_id, scheduler_cls in models:
    for prompt in prompts:
        for num_inference_steps in num_inference_steps_list:
            for guidance_scale in guidance_scale_list:
                # Load the model and scheduler
                scheduler = scheduler_cls.from_pretrained(model_id, subfolder="scheduler")
                pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
                pipe = pipe.to("cuda")

                # Generate the image and measure the time taken
                start_time = time.time()
                image = pipe(prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale).images[0]
                end_time = time.time()
                generation_time = end_time - start_time

                # Save the image with the model, prompt, and settings information
                image_name = f"{model_id.split('/')[-1]}_{prompt[:50]}_{num_inference_steps}steps_{guidance_scale}scale.png"
                image.save(image_name)

                # Print the generation time and image name
                print(f"Generation time for {image_name}: {generation_time:.2f} seconds")

        print("\n")  # Add a line break between prompts