# Evaluate Stable Diffusion with `rige`

In [1]:
import relational_image_generation_evaluation as rige

### Load the evaluator

In [2]:
EVALUATOR_NAME = "GraphCLIP"
evaluator = rige.Evaluator(EVALUATOR_NAME)

Using device cuda:1 for evaluation.


### Load the evaluation dataset

In [3]:
# testonly => ~1000 samples subset for testing purposes, not full evaluation
dataloader = rige.get_one_edge_dataloader(shuffle=False, testonly=True)
print("len(dataloader):", len(dataloader))

Loading filtered test graphs...
Finished loading filtered test graphs
Generating one edge graphs...


100%|██████████| 100/100 [00:00<00:00, 1164.98it/s]

Finished generating one edge graphs
len(dataloader): 837





### Load Stable Diffusion models

In [4]:
from diffusers import StableDiffusionPipeline
import torch
from relational_image_generation_evaluation.vision_transformer.jt_training import get_free_gpu

# First model
model_id1 = "runwayml/stable-diffusion-v1-5"
name1 = "SD 1.5"
pipe1 = StableDiffusionPipeline.from_pretrained(model_id1, safety_checker=None, torch_dtype=torch.float16)
pipe1 = pipe1.to(get_free_gpu(min_mem=21_000))
def txt2img1(txt):
    return pipe1(txt).images[0]

# Second model
model_id = "stabilityai/stable-diffusion-2-1"
name2 = "SD 2.1"
pipe2 = StableDiffusionPipeline.from_pretrained(model_id, safety_checker=None, torch_dtype=torch.float16)
pipe2 = pipe2.to(get_free_gpu(min_mem=21_000))

def txt2img2(txt):
    return pipe2(txt).images[0]


    PyTorch 2.0.0+cu118 with CUDA 1108 (you have 1.13.0+cu117)
    Python  3.10.11 (you have 3.10.9)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details
Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: 
```
pip install accelerate
```
.
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly

### Run interactive evaluation

#### Initialize lists that collect users votes and evaluator scores. Generate images and score them automatically.

In [35]:
from IPython.display import clear_output
from itertools import islice

imgs = {
    name1: [],
    name2: [],
}
votes = []
scores = {
    name1: [],
    name2: [],
}
captions = []
N_SAMPLES_TO_EVALUATE = 4
d_iter = iter(dataloader)
d_iter = islice(d_iter, N_SAMPLES_TO_EVALUATE)

labeled_functions = [(txt2img1, name1), (txt2img2, name2)]

def score_image(img, sample):
    scores = evaluator([img],[sample])
    return scores['overall_scores'][0]

for idx, sample in enumerate(d_iter):
    captions.append(sample[0].caption)
    assert len(sample) == 1
    clear_output(wait=False)
    for func, label in labeled_functions:
        print(f"{idx}/{N_SAMPLES_TO_EVALUATE}")
        img = func(sample[0].caption)  # Generate the image with the function
        score = score_image(img, sample[0])
        scores[label].append(score)
        imgs[label].append(img)

3/4


  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00, 145.93it/s]
100%|██████████| 1/1 [00:00<00:00, 16.50it/s]

3/4





  0%|          | 0/50 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00, 150.22it/s]
100%|██████████| 1/1 [00:00<00:00, 15.77it/s]


#### Display images and vote on them manually.

In [43]:
import IPython.display as display
from ipywidgets import widgets, HBox
from PIL import Image
import io


def on_button_clicked(b):
    pause = False
    if b.description == 'Pause':
        pause = True
    else:
        votes.append(b.description)
    if pause:
        print("Pausing")
    else:
        if len(votes) == len(captions):
            print("Done!")
        else:
            process()

def process():
    display.clear_output(wait=True)
    print(f"Sample {len(votes)}: {captions[len(votes)]}")
    # Display each function's image with corresponding label
    resized_images = []
    for _, label in labeled_functions:
        img = imgs[label][len(votes)]  # Generate the image with the function
        score = scores[label][len(votes)]
        display.display(f"Generator: {label}    Score: {score:.3f}")
        # Resize the image
        base_width = 300  # Define the width to resize to
        w_percent = (base_width / float(img.size[0]))
        h_size = int((float(img.size[1]) * float(w_percent)))
        img = img.resize((base_width, h_size), Image.ANTIALIAS)

        # Convert PIL Image to a format that IPython Image can render
        byte_arr = io.BytesIO()
        img.save(byte_arr, format='PNG')
        byte_arr.seek(0)
        
        resized_images.append(byte_arr)
    hbox = HBox([widgets.Image(value=resized_image.getvalue(), format='png') for resized_image in resized_images])
    display.display(hbox)

    # Display the buttons
    button_options = ["First better", "Second better", "Equally good", "Pause"]
    for option in button_options:
        button = widgets.Button(description=option)
        button.on_click(on_button_clicked)
        display.display(button)

# Start the process
process()

Sample 3: man wearing brown shoes.


'Generator: SD 1.5    Score: 4.437'

  img = img.resize((base_width, h_size), Image.ANTIALIAS)


'Generator: SD 2.1    Score: 5.617'

HBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x01,\x00\x00\x01,\x08\x02\x00\x00\x00…

Button(description='First better', style=ButtonStyle())

Button(description='Second better', style=ButtonStyle())

Button(description='Equally good', style=ButtonStyle())

Button(description='Pause', style=ButtonStyle())

Done!
