# WandB results

In [5]:
import wandb
import numpy as np
import torch
import pandas as pd

In [6]:
def flatten_log(log):
    out = {}
    for k, v in log.items():
        if isinstance(v, dict):
            for k2, v2 in v.items():
                out[f"{k}.{k2}"] = v2
        elif isinstance(v, list):
            if len(v) == 1:
                out[k] = v[0]
        else:
            out[k] = v
    return out


def handle_v(v):
    if isinstance(v, list):
        return handle_v(v[0])
    if isinstance(v, np.float32):
        return v.item()
    if isinstance(v, torch.Tensor):
        # Handle torch tensors
        if v.numel() == 1:
            return v.item()
        else:
            # Return the numpy array if it has more than one element
            return v.cpu().numpy()
    return v

In [7]:
def get_wandb_logs(api, run_path):
    # Fetch the run
    run = api.run(run_path)
    scan_history = run.scan_history()  # Fetch the scan history

    # Flatten the logs
    flattened_logs = [flatten_log(log) for log in scan_history]
    all_data = pd.DataFrame(flattened_logs)
    return all_data

In [22]:
def compute_deltas(data, up_to_step=15, max_or_min="max"):
    data = data[
        [
            "step",
            "pop_best_eval",
            "median_eval",
            "mean_eval",
            "prompt",
            "running_time",
            "memory",
        ]
    ]

    step_0 = data[data["step"] == 0]
    all_other_steps = data[data["step"] != 0]

    baseline_pop_best = step_0.groupby("prompt")["pop_best_eval"].agg(["mean"])
    pop_bests = (
        all_other_steps.loc[all_other_steps["step"] <= up_to_step]
        .groupby("prompt")[["pop_best_eval", "mean_eval", "median_eval"]]
        .agg([max_or_min])
    )

    pop_bests.columns = pop_bests.columns.map("_".join)

    joined_data = baseline_pop_best.join(pop_bests)
    joined_data["delta_max"] = joined_data[f"pop_best_eval_{max_or_min}"] - joined_data["mean"]
    joined_data["delta_mean"] = joined_data[f"mean_eval_{max_or_min}"] - joined_data["mean"]
    joined_data["delta_median"] = joined_data[f"median_eval_{max_or_min}"] - joined_data["mean"]

    # joined_data = joined_data.rename(columns={"mean": "baseline", "pop_best_eval_max": "pop_best"})
    joined_data = joined_data.reset_index()
    joined_data = joined_data.rename(
        columns={
            "mean": "baseline",
            f"pop_best_eval_{max_or_min}": "pop_best",
            f"mean_eval_{max_or_min}": "mean",
            f"median_eval_{max_or_min}": "median",
            "delta_max": "improvement_max",
            "delta_mean": "improvement_mean",
            "delta_median": "improvement_median",
        }
    )
    return joined_data

In [15]:
def compute_deltas_dnos(data, up_to_step=15):
    data = data[["Step", "Prompt", "Reward"]]

    step_0 = data[data["Step"] == 0]
    all_other_steps = data[data["Step"] != 0]

    baseline_pop_best = step_0.groupby("Prompt")["Reward"].agg(["mean"])
    pop_bests = (
        all_other_steps.loc[all_other_steps["Step"] <= up_to_step]
        .groupby("Prompt")["Reward"]
        .agg(["max"])
    )
    joined_data = baseline_pop_best.join(pop_bests)
    joined_data["delta"] = joined_data["max"] - joined_data["mean"]
    joined_data = joined_data.rename(columns={"mean": "baseline", "max": "pop_best"})
    joined_data = joined_data.reset_index()
    joined_data = joined_data.rename(columns={"mean": "baseline", "max": "pop_best", "delta": "improvement", "Prompt": "prompt"})
    return joined_data

In [16]:
def add_fields(data, fields):
    for col, val in fields.items():
        data[col] = val
    return data

In [17]:
# Initialize WandB
api = wandb.Api(timeout=60)

## SD Random

### Img Reward

In [18]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/skl06y3a")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,-0.031075,1.406665,0.273785,0.382568,1.43774,0.30486,0.413643
std,1.082906,0.519118,0.752042,0.843772,0.962182,0.724381,0.734997
min,-2.268306,-0.568638,-1.496632,-1.717174,-0.028912,-2.11034,-1.954732
25%,-0.714113,1.189102,-0.307636,-0.188857,0.725083,-0.152275,-0.06923
50%,0.043865,1.582806,0.311675,0.437331,1.286778,0.172543,0.27964
75%,0.769696,1.78302,0.882153,0.995918,1.9666,0.775642,0.83182
max,1.906958,1.988246,1.784922,1.877474,3.987686,2.92075,3.081417


### HPSv2

In [19]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/bzquoitz")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.275281,0.301366,0.280917,0.282038,0.026085,0.005636,0.006757
std,0.017328,0.016452,0.015446,0.015873,0.011435,0.009267,0.009438
min,0.227539,0.258057,0.238144,0.237793,0.004639,-0.018585,-0.012329
25%,0.263367,0.289673,0.269621,0.270081,0.018799,-0.001179,-0.000732
50%,0.275024,0.303711,0.282387,0.284485,0.025635,0.006172,0.007141
75%,0.288391,0.313965,0.292622,0.294067,0.031799,0.011074,0.012085
max,0.330566,0.34375,0.314606,0.318237,0.071045,0.039276,0.043335


### CLIP

In [20]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/bfzpae40")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,31.830313,37.139063,32.623672,32.866641,5.30875,0.793359,1.036328
std,4.121937,3.66497,3.381918,3.502261,2.885185,2.193773,2.268433
min,18.59375,29.09375,21.0625,20.5625,-0.28125,-3.75,-3.953125
25%,29.410156,34.515625,30.917969,30.90625,3.367188,-0.664062,-0.296875
50%,31.976562,37.015625,32.59375,32.8125,4.71875,0.5625,0.8125
75%,34.03125,39.664062,35.03125,35.265625,6.894531,2.0625,2.140625
max,44.46875,49.5625,41.15625,41.75,16.328125,9.75,10.140625


### JPEG

In [25]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/miy93gst")
compute_deltas(data, up_to_step=15, max_or_min="min").describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,116.78278,66.91517,108.156964,105.428983,-49.86761,-8.625817,-11.353798
std,29.854665,17.825191,20.080439,20.664202,26.241667,21.975544,21.877609
min,58.554001,26.548,64.807999,60.786499,-135.653999,-81.400375,-88.889999
25%,96.636997,55.0785,93.763767,91.465878,-65.636998,-22.977505,-24.807749
50%,114.785004,64.23,105.787186,103.53875,-46.273998,-6.747185,-9.158749
75%,132.409252,76.063498,118.883076,116.39525,-30.921998,5.859894,3.239996
max,212.755997,136.587006,203.392944,203.583008,4.314003,43.750999,41.611496


## SD Zero-order Img Reward

### Img Reward

In [None]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/4ghfz8nm")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.035915,1.534577,0.945013,1.082029,1.498662,0.909097,1.046114
std,1.058178,0.465393,0.678645,0.710983,1.007375,0.878178,0.961096
min,-2.229451,-0.129912,-1.331369,-1.479679,0.084965,-0.57546,-0.423726
25%,-0.672078,1.361493,0.469055,0.64084,0.658142,0.233622,0.300892
50%,0.137491,1.722724,1.051813,1.303073,1.265968,0.685107,0.747256
75%,0.847085,1.86637,1.502448,1.667772,2.111342,1.334105,1.521701
max,1.87652,2.006367,1.966297,1.989202,4.007376,3.532083,3.874983


### HPSv2

In [75]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/rwijixwm")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.277531,0.303896,0.289102,0.290278,0.026366,0.011572,0.012748
std,0.018112,0.017057,0.016742,0.017151,0.012269,0.010519,0.011107
min,0.233154,0.26001,0.245117,0.245117,0.002441,-0.009679,-0.010742
25%,0.264771,0.292236,0.276741,0.277771,0.018066,0.005418,0.006042
50%,0.280029,0.305542,0.291425,0.292114,0.024536,0.010046,0.010742
75%,0.289551,0.316467,0.302325,0.30249,0.032471,0.015758,0.017578
max,0.33252,0.346924,0.334659,0.335938,0.072144,0.058953,0.062744


### CLIP

In [76]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/ullx2qou")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,31.684453,37.589609,34.235234,34.545156,5.905156,2.550781,2.860703
std,4.032816,3.864454,3.573508,3.697707,3.136509,2.463453,2.567563
min,18.5,28.0,24.8125,24.578125,0.578125,-2.21875,-3.53125
25%,29.03125,34.640625,31.949219,31.917969,3.6875,0.960938,1.246094
50%,31.539062,37.71875,34.265625,34.4375,5.054688,2.007812,2.242188
75%,34.75,40.34375,36.664062,37.21875,7.589844,3.714844,3.972656
max,42.59375,48.34375,44.59375,44.9375,17.09375,12.9375,13.21875


### JPEG

In [24]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/gz3b6q2k")
compute_deltas(data, up_to_step=15, max_or_min="min").describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,115.62953,53.345975,64.290861,62.75461,-62.283555,-51.33867,-52.87492
std,27.508617,18.421629,20.472914,20.645551,24.887105,24.283941,24.494955
min,63.051998,15.89,22.702116,18.319,-182.047005,-174.057594,-175.105003
25%,96.998999,39.2875,50.387574,48.34075,-77.560498,-66.759896,-69.540252
50%,113.143501,51.469999,62.311882,61.207001,-59.809002,-48.798563,-50.538498
75%,131.945995,66.555748,77.573486,74.699749,-42.916752,-32.814957,-35.337749
max,236.546005,116.901001,153.927933,158.854004,-19.631996,-8.570526,-9.719994


## DNO

### Img Reward

In [103]:
data = get_wandb_logs(api, "pjajal/inference-dno/9vis1gzw")
compute_deltas_dnos(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,-0.082614,0.706908,0.789523
std,1.017341,0.925771,0.883648
min,-2.212891,-1.744141,-2.246582
25%,-0.765015,0.038269,0.183695
50%,-0.093689,0.910121,0.60083
75%,0.686523,1.512451,1.241638
max,1.845703,1.950374,3.990234


### HPSv2

In [78]:
data = get_wandb_logs(api, "pjajal/inference-dno/h26tfx2i")
compute_deltas_dnos(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,improvement
count,200.0,200.0,200.0
mean,0.274603,0.286219,0.011616
std,0.016887,0.017411,0.012487
min,0.221313,0.240967,-0.038818
25%,0.262573,0.274109,0.004395
50%,0.275513,0.286255,0.010986
75%,0.288513,0.299805,0.018066
max,0.313477,0.32411,0.068604


## SD Cosyne

### Img Reward

In [79]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/p5selqpd")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.035915,1.614149,1.370119,1.378829,1.578233,1.334203,1.342913
std,1.058178,0.417252,0.566536,0.57337,1.024946,0.997066,1.010158
min,-2.229451,-0.092948,-0.61832,-0.608302,0.084586,-0.014937,-0.021542
25%,-0.672078,1.489509,1.110918,1.132617,0.713431,0.504703,0.490776
50%,0.137491,1.765213,1.543249,1.56469,1.396616,1.082124,1.092388
75%,0.847085,1.884118,1.8154,1.818406,2.244661,1.938503,1.925829
max,1.87652,2.001776,1.992454,1.993146,4.092068,4.070545,4.072615


### HPSv2

In [80]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/4p9ka0yv")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.277531,0.310094,0.300125,0.299719,0.032563,0.022594,0.022189
std,0.018112,0.016525,0.016382,0.016421,0.013393,0.01219,0.012221
min,0.233154,0.270508,0.25946,0.259277,0.006592,-0.002472,-0.002686
25%,0.264771,0.299011,0.288067,0.28772,0.022705,0.012901,0.012634
50%,0.280029,0.311035,0.302002,0.301636,0.030518,0.021416,0.020752
75%,0.289551,0.321289,0.312466,0.312744,0.039673,0.027832,0.027588
max,0.33252,0.352051,0.340775,0.3396,0.072632,0.062363,0.062744


### CLIP

In [104]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/lxdmzzpo")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,31.684453,38.791328,36.529292,36.454922,7.106875,4.844839,4.770469
std,4.032816,3.811949,3.578963,3.613618,3.387622,3.081431,3.077962
min,18.5,30.328125,28.550781,28.21875,1.21875,-0.107422,-0.15625
25%,29.03125,35.78125,34.312744,34.210938,4.585938,2.741699,2.652344
50%,31.539062,38.671875,36.09375,36.140625,6.109375,4.015625,3.84375
75%,34.75,41.484375,39.172363,38.859375,9.09375,6.568359,6.542969
max,42.59375,50.0625,46.882812,46.9375,20.3125,16.935547,16.6875


### JPEG

In [26]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/5g3otp1e")
compute_deltas(data, up_to_step=15, max_or_min="min").describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,115.62953,39.523165,45.02715,44.758985,-76.106365,-70.60238,-70.870545
std,27.508617,15.876781,16.276318,16.400938,27.784734,27.636138,27.599824
min,63.051998,13.779,15.429188,15.423,-199.767006,-197.204567,-196.811005
25%,96.998999,25.397,30.748124,30.243249,-93.75325,-87.917467,-87.939251
50%,113.143501,36.788,42.387501,42.414501,-75.110498,-71.139155,-71.369499
75%,131.945995,52.7305,58.911922,58.719252,-52.823501,-47.461876,-47.241499
max,236.546005,79.336998,85.922562,87.250999,-25.723999,-20.501316,-20.816002


### Rot

### Image Reward

In [93]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/sspqm9uy")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.035915,1.531776,1.225925,1.22525,1.495861,1.190009,1.189335
std,1.058178,0.436858,0.617,0.633499,1.008532,0.950301,0.960091
min,-2.229451,-0.258774,-0.803441,-0.811372,0.054399,-0.32927,-0.335338
25%,-0.672078,1.367232,0.818715,0.845888,0.667455,0.413847,0.392734
50%,0.137491,1.658797,1.405625,1.419823,1.321334,0.936332,0.914392
75%,0.847085,1.857815,1.742325,1.738876,2.142321,1.828577,1.812024
max,1.87652,2.000602,1.975128,1.982139,4.030253,3.995003,3.98763


## SD SNES

### Img Reward

In [105]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/yxq7l3oo")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.035915,1.391709,0.704724,0.787659,1.355794,0.668808,0.751743
std,1.058178,0.523346,0.766267,0.800523,0.951857,0.812782,0.852293
min,-2.229451,-0.402447,-1.327335,-1.386546,0.087315,-0.730822,-0.668614
25%,-0.672078,1.108612,0.166353,0.192583,0.565178,0.110294,0.161316
50%,0.137491,1.53466,0.850373,0.933905,1.203921,0.505387,0.542231
75%,0.847085,1.812005,1.24809,1.469237,1.888701,1.011792,1.072933
max,1.87652,1.977036,1.927174,1.944097,4.049063,3.579776,3.885332


### HPSv2

In [106]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/l1461i7v")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.277531,0.300248,0.285404,0.286013,0.022717,0.007874,0.008482
std,0.018112,0.016065,0.015913,0.016141,0.01146,0.010033,0.010144
min,0.233154,0.259521,0.247888,0.247314,-0.000244,-0.011822,-0.010986
25%,0.264771,0.289307,0.27397,0.273865,0.014343,0.000351,0.00116
50%,0.280029,0.300781,0.287775,0.287964,0.020752,0.007025,0.007568
75%,0.289551,0.312012,0.297034,0.298096,0.028687,0.012567,0.013245
max,0.33252,0.345947,0.323072,0.324463,0.061279,0.041497,0.042969


### CLIP

In [107]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/j1yao8sr")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,31.684453,38.061328,35.372437,35.623438,6.376875,3.687984,3.938984
std,4.032816,3.750809,3.500092,3.563337,3.128541,2.660726,2.750112
min,18.5,29.09375,24.60322,24.546875,1.84375,-2.321022,-2.25
25%,29.03125,35.398438,33.160393,33.21875,4.148438,2.007577,2.171875
50%,31.539062,37.875,35.132103,35.328125,5.492188,3.115059,3.210938
75%,34.75,40.695312,37.414183,37.960938,8.097656,4.732482,4.972656
max,42.59375,49.5625,44.728222,44.96875,19.21875,16.803032,17.1875


## SD PGPE

### Img Reward

In [89]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/w5sjg6wc")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.035915,1.261161,0.896539,0.921442,1.225246,0.860623,0.885526
std,1.058178,0.645635,0.796289,0.80134,0.884223,0.834261,0.846888
min,-2.229451,-1.166222,-1.640992,-1.681239,0.071018,-0.225553,-0.171246
25%,-0.672078,0.91431,0.350781,0.371503,0.52701,0.283817,0.291152
50%,0.137491,1.418985,1.069489,1.112125,1.046495,0.595365,0.621568
75%,0.847085,1.771437,1.541434,1.554925,1.666432,1.153547,1.20279
max,1.87652,1.97791,1.947841,1.951301,4.001263,3.936658,3.941486


### HPSv2

In [83]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/5c0hh9jn")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,0.277531,0.298867,0.289447,0.289587,0.021337,0.011916,0.012057
std,0.018112,0.01713,0.016872,0.016993,0.010459,0.009091,0.009167
min,0.233154,0.256348,0.252434,0.252197,0.003418,-0.004715,-0.004395
25%,0.264771,0.286804,0.277004,0.277039,0.013672,0.005665,0.005554
50%,0.280029,0.300903,0.290482,0.290527,0.019653,0.010849,0.010742
75%,0.289551,0.31134,0.301945,0.302124,0.027161,0.016739,0.017151
max,0.33252,0.341553,0.334412,0.335449,0.070435,0.04863,0.049683


### CLIP

In [90]:
data = get_wandb_logs(api, "pjajal/inference-diffusion-noise-optim/jd8c8l67")
compute_deltas(data, up_to_step=15).describe()

Unnamed: 0,baseline,pop_best,mean,median,improvement_max,improvement_mean,improvement_median
count,200.0,200.0,200.0,200.0,200.0,200.0,200.0
mean,31.684453,37.051875,34.317905,34.396719,5.367422,2.633452,2.712266
std,4.032816,3.607407,3.417293,3.455033,2.951372,2.428392,2.49755
min,18.5,29.25,26.083984,25.859375,0.40625,-3.134766,-3.765625
25%,29.03125,34.398438,32.047119,31.976562,3.386719,1.135742,1.160156
50%,31.539062,36.65625,34.075195,34.03125,4.570312,2.083984,2.179688
75%,34.75,39.8125,36.391113,36.507812,6.683594,3.719238,3.777344
max,42.59375,47.84375,43.585938,43.46875,16.328125,14.525391,14.34375
