In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from PIL import Image

plt.rcParams.update(
    {
        "pgf.texsystem": "pdflatex",
        "font.family": "serif",
        "font.size": 15,  # Set font size to 11pt
        "axes.labelsize": 15,  # -> axis labels
        "xtick.labelsize": 12,
        "ytick.labelsize": 12,
        "legend.fontsize": 12,
        "lines.linewidth": 2,
        "text.usetex": False,
        "pgf.rcfonts": False,
    }
)


def image_grid(imgs, rows, cols):
    assert len(imgs) == rows * cols

    w, h = imgs[0].shape[1], imgs[0].shape[0]
    grid = Image.new("RGB", size=(cols * w, rows * h))
    for i, img in enumerate(imgs):
        img = img.astype(np.uint8)
        grid.paste(Image.fromarray(img), box=(i % cols * w, i // cols * h))
    return grid


In [2]:
metrics_sdxl = pd.read_csv(
    "/storage2/bartosz/code/t2i2/results_sd3/glyph_simple/timestep_search/20240917_210950_seed_42_n_samples_per_prompt_1_n_inference_steps_28_guidance_scale_7.0_max_timestep_10_attentions_to_patch_10/metrics.csv"
)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))

x = np.arange(len(metrics_sdxl["Block_patched"].unique()) - 2)

ocr_f1s_b_sdxl = [
    (
        2
        * metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "OCR_B_Prec"].mean()
        * metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "OCR_B_Rec"].mean()
    )
    / (
        metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "OCR_B_Prec"].mean()
        + metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "OCR_B_Rec"].mean()
        + 1e-6
    )
    for run in metrics_sdxl["Block_patched"].unique()
][2:]

clip_b_sdxl = [
    metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "CLIPScore_prompt_B"].mean()
    for run in metrics_sdxl["Block_patched"].unique()
][2:]

leves_b_sdxl = [
    metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "Levenshtein_B"].mean() / 10
    for run in metrics_sdxl["Block_patched"].unique()
][2:]

plt.plot(ocr_f1s_b_sdxl, label=r"OCR F1 $\uparrow$", marker="o")
plt.plot(clip_b_sdxl, label=r"CLIP-T $\uparrow$", marker="o")
plt.plot(leves_b_sdxl, label=r"LD $\times 10^{-1}$ $\downarrow$", marker="o")

plt.xticks(x, [f"{i}" for i in range(len(metrics_sdxl["Block_patched"].unique()) - 2)])

plt.title("SD3 start patching timestep - text alignment")
plt.xlabel("Timestep")
plt.grid()

plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.25), ncol=3)
plt.tight_layout()
# plt.show()
plt.savefig("timestep_sd3_text.pdf", bbox_inches="tight")


In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(10, 5))

x = np.arange(len(metrics_sdxl["Block_patched"].unique()) - 2)

mses = [
    metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "MSE"].mean() / 100
    for run in metrics_sdxl["Block_patched"].unique()
][2:]
ssims = [
    metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "SSIM"].mean()
    for run in metrics_sdxl["Block_patched"].unique()
][2:]
psnrs = [
    metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "PSNR"].mean() / 100
    for run in metrics_sdxl["Block_patched"].unique()
][2:]

clips_i = [
    metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "CLIPScore_image"].mean()
    for run in metrics_sdxl["Block_patched"].unique()
][2:]

plt.plot(mses, label=r"MSE $\times 10^{-2}$  $\downarrow$", marker="o")
plt.plot(ssims, label=r"SSIM $\uparrow$", marker="o")
plt.plot(psnrs, label=r"PSNR $\times 10^{-2}$ $\downarrow$", marker="o")
# plt.plot(clips_i, label=r"CLIP-I $\uparrow$", marker="o")


plt.xticks(x, [f"{i}" for i in range(len(metrics_sdxl["Block_patched"].unique()) - 2)])

plt.title("SD3 start patching timestep - image alignment")
plt.xlabel("Timestep")
plt.grid()

plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.25), ncol=4)
plt.tight_layout()
# plt.show()
plt.savefig("timestep_sd3_image.pdf", bbox_inches="tight")


In [None]:
metrics_sdxl.columns

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.figure(figsize=(20, 10))

x = np.arange(len(metrics_sdxl["Block_patched"].unique()) - 2)
width = 0.1  # Width of each bar

mses_sdxl = [
    metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "MSE"].mean() / 10000
    for run in metrics_sdxl["Block_patched"].unique()
][2:]
ssims = [
    metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "SSIM"].mean()
    for run in metrics_sdxl["Block_patched"].unique()
][2:]
psnrs = [
    metrics_sdxl.loc[metrics_sdxl["Block_patched"] == run, "PSNR"].mean() / 100
    for run in metrics_sdxl["Block_patched"].unique()
][2:]


plt.bar(x - 1.5 * width, mses_sdxl, width, label="SD-XL MSE", edgecolor="black")
plt.bar(x - 0.5 * width, ssims, width, label="SD-XL SSIM", edgecolor="black")
plt.bar(x + 0.5 * width, psnrs, width, label="SD-XL PSNR", edgecolor="black")

plt.xticks(x, [f"timestep_{i}" for i in range(10)], rotation=45)

plt.title("SimpleBench timestep start patching")
plt.xlabel("Patched")
plt.grid(axis="y")
plt.ylabel("OCR F1 B")

plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.2), ncol=8)
plt.tight_layout()
# plt.show()
plt.savefig("timestep.pdf", bbox_inches="tight")


In [None]:
np.argsort(ocr_f1s_b_sdxl[2:])[::-1]

In [None]:
ocr_f1s_b_sdxl[2:]

In [None]:
np.argsort(clip_b_sdxl[2:])[::-1]

In [12]:
ims = np.load(
    "/storage2/bartosz/code/t2i2/results_sd3/glyph_simple/timestep_search/model_stable-diffusion-3-medium-diffusers_seed_42_samples_1_batch_8_device_cuda_steps_28_attn_10_heads_None_guidance_scale_9.0_20240908_113407/None.npy"
)

In [None]:
image_grid(ims[:4], 1, 4)  # A

In [None]:
image_grid(ims[:4], 1, 4)  # 0

In [None]:
image_grid(ims[:4], 1, 4)  # 1

In [None]:
image_grid(ims[:4], 1, 4)  # 2