In [None]:
import pandas as pd

# SD Cosyne Img Reward

In [None]:
sd_cosyne_img_reward = pd.read_csv(
    "results/evals/sd_cosyne_img_reward/measurements.csv"
)
sd_cosyne_img_reward["name"] = sd_cosyne_img_reward["img_path"].apply(
    lambda x: x.split("/")[-1].split(".")[0]
)

In [None]:
filtered_df = sd_cosyne_img_reward[
    ~sd_cosyne_img_reward["name"].isin(["max", "baseline"])
].copy()
filtered_df["name"] = filtered_df["name"].astype(int)
filtered_df = filtered_df.loc[
    filtered_df["name"].isin(range(17))
]  # ONLY LOOK AT THE FIRST 16 IMAGES.
filtered_df = filtered_df.sort_values(by="name")

In [None]:
data = []
for prompt, group in filtered_df.groupby("prompt"):
    zero_row = group.loc[group["name"] == 0]
    max_row = group.loc[group["img_reward_score"].idxmax()]
    delta_img_reward = (
        max_row["img_reward_score"] - zero_row["img_reward_score"]
    ).item()
    delta_hpsv_reward = (
        max_row["hpsv_reward_score"] - zero_row["hpsv_reward_score"]
    ).item()
    delta_aesthetic_reward = (
        max_row["aesthetic_reward_score"] - zero_row["aesthetic_reward_score"]
    ).item()
    delta_clip_reward = (
        max_row["clip_reward_score"] - zero_row["clip_reward_score"]
    ).item()
    data.append(
        {
            "prompt": prompt,
            "delta_img_reward": delta_img_reward,
            "delta_clip_reward": delta_clip_reward,
            "delta_aesthetic_reward": delta_aesthetic_reward,
            "delta_hpsv_reward": delta_hpsv_reward,
        }
    )

In [None]:
delta_data = pd.DataFrame(data)
delta_data.describe()

# SD SNES Img Reward

In [None]:
sd_snes_img_reward = pd.read_csv("results/evals/sd_snes_img_reward/measurements.csv")
sd_snes_img_reward["name"] = sd_snes_img_reward["img_path"].apply(
    lambda x: x.split("/")[-1].split(".")[0]
)

In [None]:
filtered_df = sd_cosyne_img_reward[
    ~sd_snes_img_reward["name"].isin(["max", "baseline"])
].copy()
filtered_df["name"] = filtered_df["name"].astype(int)
filtered_df = filtered_df.loc[
    filtered_df["name"].isin(range(17))
]  # ONLY LOOK AT THE FIRST 16 IMAGES.
filtered_df = filtered_df.sort_values(by="name")

In [None]:
data = []
for prompt, group in sd_snes_img_reward.groupby("prompt"):
    zero_row = group.loc[group["name"] == 0]
    max_row = group.loc[group["img_reward_score"].idxmax()]
    delta_img_reward = (
        max_row["img_reward_score"] - zero_row["img_reward_score"]
    ).item()
    delta_hpsv_reward = (
        max_row["hpsv_reward_score"] - zero_row["hpsv_reward_score"]
    ).item()
    delta_aesthetic_reward = (
        max_row["aesthetic_reward_score"] - zero_row["aesthetic_reward_score"]
    ).item()
    delta_clip_reward = (
        max_row["clip_reward_score"] - zero_row["clip_reward_score"]
    ).item()
    data.append(
        {
            "prompt": prompt,
            "delta_img_reward": delta_img_reward,
            "delta_clip_reward": delta_clip_reward,
            "delta_aesthetic_reward": delta_aesthetic_reward,
            "delta_hpsv_reward": delta_hpsv_reward,
        }
    )

In [None]:
delta_data = pd.DataFrame(data)
delta_data.describe()

# WandB results

In [125]:
import wandb
import numpy as np
import torch
import pandas as pd

In [126]:
def flatten_log(log):
    out = {}
    for k, v in log.items():
        if isinstance(v, dict):
            for k2, v2 in v.items():
                out[f"{k}.{k2}"] = v2
        elif isinstance(v, list):
            if len(v) == 1:
                out[k] = v[0]
        else:
            out[k] = v
    return out


def handle_v(v):
    if isinstance(v, list):
        return handle_v(v[0])
    if isinstance(v, np.float32):
        return v.item()
    if isinstance(v, torch.Tensor):
        # Handle torch tensors
        if v.numel() == 1:
            return v.item()
        else:
            # Return the numpy array if it has more than one element
            return v.cpu().numpy()
    return v

In [127]:
def get_wandb_logs(api, run_path):
    # Fetch the run
    run = api.run(run_path)
    scan_history = run.scan_history()  # Fetch the scan history

    # Flatten the logs
    flattened_logs = [flatten_log(log) for log in scan_history]
    all_data = pd.DataFrame(flattened_logs)
    return all_data

In [128]:
def compute_deltas(data, up_to_step=15):
    data = data[
        [
            "step",
            "pop_best_eval",
            "median_eval",
            "mean_eval",
            "prompt",
            "running_time",
            "memory",
        ]
    ]

    step_0 = data[data["step"] == 0]
    all_other_steps = data[data["step"] != 0]

    baseline_pop_best = step_0.groupby("prompt")["pop_best_eval"].agg(["mean"])
    pop_bests = (
        all_other_steps.loc[all_other_steps["step"] <= up_to_step]
        .groupby("prompt")["pop_best_eval"]
        .agg(["max"])
    )
    joined_data = baseline_pop_best.join(pop_bests)
    joined_data["delta"] = joined_data["max"] - joined_data["mean"]
    joined_data = joined_data.rename(columns={"mean": "baseline", "max": "pop_best"})
    joined_data = joined_data.reset_index()
    joined_data = joined_data.rename(columns={"mean": "baseline", "max": "pop_best", "delta": "improvement"})
    return joined_data


In [129]:
def compute_deltas_dnos(data, up_to_step=15):
    data = data[["Step", "Prompt", "Reward"]]

    step_0 = data[data["Step"] == 0]
    all_other_steps = data[data["Step"] != 0]

    baseline_pop_best = step_0.groupby("Prompt")["Reward"].agg(["mean"])
    pop_bests = (
        all_other_steps.loc[all_other_steps["Step"] <= up_to_step]
        .groupby("Prompt")["Reward"]
        .agg(["max"])
    )
    joined_data = baseline_pop_best.join(pop_bests)
    joined_data["delta"] = joined_data["max"] - joined_data["mean"]
    joined_data = joined_data.rename(columns={"mean": "baseline", "max": "pop_best"})
    joined_data = joined_data.reset_index()
    joined_data = joined_data.rename(columns={"mean": "baseline", "max": "pop_best", "delta": "improvement", "Prompt": "prompt"})
    return joined_data

In [130]:
def add_fields(data, fields):
    for col, val in fields.items():
        data[col] = val
    return data

In [131]:
# Initialize WandB
api = wandb.Api(timeout=60)

## SD Random

### Img Reward

In [132]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/skl06y3a")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,-0.031075,1.406665,1.43774
std,1.082906,0.519118,0.962182
min,-2.268306,-0.568638,-0.028912
25%,-0.714113,1.189102,0.725083
50%,0.043865,1.582806,1.286778
75%,0.769696,1.78302,1.9666
max,1.906958,1.988246,3.987686


### HPSv2

In [133]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/bzquoitz")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,0.275281,0.301366,0.026085
std,0.017328,0.016452,0.011435
min,0.227539,0.258057,0.004639
25%,0.263367,0.289673,0.018799
50%,0.275024,0.303711,0.025635
75%,0.288391,0.313965,0.031799
max,0.330566,0.34375,0.071045


### CLIP

In [134]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/bfzpae40")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,31.830313,37.139063,5.30875
std,4.121937,3.66497,2.885185
min,18.59375,29.09375,-0.28125
25%,29.410156,34.515625,3.367188
50%,31.976562,37.015625,4.71875
75%,34.03125,39.664062,6.894531
max,44.46875,49.5625,16.328125


## SD Zero-order Img Reward

### Img Reward

In [135]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/4ghfz8nm")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,0.035915,1.454864,1.418948
std,1.058178,0.52967,0.990894
min,-2.229451,-0.40778,0.078075
25%,-0.672078,1.237632,0.625569
50%,0.137491,1.673038,1.203078
75%,0.847085,1.851148,2.002062
max,1.87652,2.00197,3.984655


### HPSv2

In [136]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/rwijixwm")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,0.277531,0.303896,0.026366
std,0.018112,0.017057,0.012269
min,0.233154,0.26001,0.002441
25%,0.264771,0.292236,0.018066
50%,0.280029,0.305542,0.024536
75%,0.289551,0.316467,0.032471
max,0.33252,0.346924,0.072144


### CLIP

In [137]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/ullx2qou")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,31.684453,37.589609,5.905156
std,4.032816,3.864454,3.136509
min,18.5,28.0,0.578125
25%,29.03125,34.640625,3.6875
50%,31.539062,37.71875,5.054688
75%,34.75,40.34375,7.589844
max,42.59375,48.34375,17.09375


## DNO

### Img Reward

In [118]:
data = get_wandb_logs(api, "pjajal/inference-dno/9vis1gzw")
compute_deltas_dnos(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,-0.082614,0.706908,0.789523
std,1.017341,0.925771,0.883648
min,-2.212891,-1.744141,-2.246582
25%,-0.765015,0.038269,0.183695
50%,-0.093689,0.910121,0.60083
75%,0.686523,1.512451,1.241638
max,1.845703,1.950374,3.990234


### HPSv2

In [119]:
data = get_wandb_logs(api, "pjajal/inference-dno/h26tfx2i")
compute_deltas_dnos(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,0.274603,0.286219,0.011616
std,0.016887,0.017411,0.012487
min,0.221313,0.240967,-0.038818
25%,0.262573,0.274109,0.004395
50%,0.275513,0.286255,0.010986
75%,0.288513,0.299805,0.018066
max,0.313477,0.32411,0.068604


## SD Cosyne

### Img Reward

In [120]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/p5selqpd")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,0.035915,1.614149,1.578233
std,1.058178,0.417252,1.024946
min,-2.229451,-0.092948,0.084586
25%,-0.672078,1.489509,0.713431
50%,0.137491,1.765213,1.396616
75%,0.847085,1.884118,2.244661
max,1.87652,2.001776,4.092068


### HPSv2

In [121]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/4p9ka0yv")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,0.277531,0.310094,0.032563
std,0.018112,0.016525,0.013393
min,0.233154,0.270508,0.006592
25%,0.264771,0.299011,0.022705
50%,0.280029,0.311035,0.030518
75%,0.289551,0.321289,0.039673
max,0.33252,0.352051,0.072632


## SD SNES

### Img Reward

In [122]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/yxq7l3oo")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,0.035915,1.391709,1.355794
std,1.058178,0.523346,0.951857
min,-2.229451,-0.402447,0.087315
25%,-0.672078,1.108612,0.565178
50%,0.137491,1.53466,1.203921
75%,0.847085,1.812005,1.888701
max,1.87652,1.977036,4.049063


## SD PGPE

### Img Reward

In [123]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/w5sjg6wc")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,0.035915,1.261161,1.225246
std,1.058178,0.645635,0.884223
min,-2.229451,-1.166222,0.071018
25%,-0.672078,0.91431,0.52701
50%,0.137491,1.418985,1.046495
75%,0.847085,1.771437,1.666432
max,1.87652,1.97791,4.001263


### HPSv2

In [138]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/5c0hh9jn")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,0.277531,0.298867,0.021337
std,0.018112,0.01713,0.010459
min,0.233154,0.256348,0.003418
25%,0.264771,0.286804,0.013672
50%,0.280029,0.300903,0.019653
75%,0.289551,0.31134,0.027161
max,0.33252,0.341553,0.070435
