# Spoof Attack Analysis


In [2]:
import re
from typing import Dict, Any

import wandb
import polars as pl
import altair as alt

In [3]:
def parse_sweep_url(url: str):
    m = re.search(r'wandb\.ai/([^/]+)/([^/]+)/sweeps/([a-z0-9]+)', url)
    if not m:
        raise ValueError('Unrecognized sweep URL; expected .../ENTITY/PROJECT/sweeps/ID')
    return m.group(1), m.group(2), m.group(3)

def fetch_sweep_runs(entity: str, project: str, sweep_id: str) -> pl.DataFrame:
    """
    Returns a DataFrame where each row is a run.
    Columns include run metadata plus flattened `config.*` and `summary.*` fields.
    """

    api = wandb.Api(timeout=300)
    sw = api.sweep(f'{entity}/{project}/{sweep_id}')
    runs = list(sw.runs)
    records = []
    for r in runs:
        cfg: Dict[str, Any] = dict(getattr(r, 'config', {}) or {})
        summ: Dict[str, Any] = dict(getattr(r, 'summary', {}) or {})
        rec: Dict[str, Any] = {
            'run_id': getattr(r, 'id', None),
            'run_name': getattr(r, 'name', None),
            'state': getattr(r, 'state', None),
            'created_at': str(getattr(r, 'created_at', '')),
            'entity': entity,
            'project': project,
            'sweep_id': sweep_id,
        }

        items = [*cfg.items(), *summ.items()]

        for k, v in items:
            rec[k] = v

        records.append(rec)
    df = pl.DataFrame(records)
    return df


In [4]:
WANDB_ENTITY = "juanbelieni-lab"
WANDB_PROJECT = "watermark-spoof"
SWEEP_ID = "phkoqagh"

# Fetch runs into a DataFrame
runs_df = fetch_sweep_runs(WANDB_ENTITY, WANDB_PROJECT, SWEEP_ID)
print(f'Loaded {len(runs_df)} runs from sweep.')
runs_df.head()


[34m[1mwandb[0m: Currently logged in as: [33mjuanbelieni[0m ([33mjuanbelieni-lab[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Loaded 220 runs from sweep.


run_id,run_name,state,created_at,entity,project,sweep_id,seed,clip_c,model_id,delta_att,num_prompts,window_size,limit_samples,max_new_tokens,_runtime,_step,_timestamp,_wandb,success_rate,z_avg
str,str,str,str,str,str,str,i64,i64,str,f64,i64,i64,i64,i64,i64,i64,f64,object,f64,f64
"""56x7mkfo""","""unique-sweep-1""","""finished""","""2025-08-29T06:59:18Z""","""juanbelieni-lab""","""watermark-spoof""","""phkoqagh""",0,2,"""Qwen/Qwen2.5-3B-Instruct""",2.5,100,2,5,500,61,0,1756500000.0,{'runtime': 61},0.06,0.092551
"""2oovukmv""","""proud-sweep-2""","""finished""","""2025-08-29T07:00:31Z""","""juanbelieni-lab""","""watermark-spoof""","""phkoqagh""",1,2,"""Qwen/Qwen2.5-3B-Instruct""",2.5,100,2,5,500,60,0,1756500000.0,{'runtime': 60},0.05,0.044097
"""ecioxny3""","""magic-sweep-3""","""finished""","""2025-08-29T07:01:38Z""","""juanbelieni-lab""","""watermark-spoof""","""phkoqagh""",2,2,"""Qwen/Qwen2.5-3B-Instruct""",2.5,100,2,5,500,58,0,1756500000.0,{'runtime': 58},0.03,0.082924
"""6z4eonph""","""sparkling-sweep-4""","""finished""","""2025-08-29T07:02:46Z""","""juanbelieni-lab""","""watermark-spoof""","""phkoqagh""",3,2,"""Qwen/Qwen2.5-3B-Instruct""",2.5,100,2,5,500,60,0,1756500000.0,{'runtime': 60},0.03,-0.166147
"""czyoodei""","""atomic-sweep-5""","""finished""","""2025-08-29T07:03:53Z""","""juanbelieni-lab""","""watermark-spoof""","""phkoqagh""",4,2,"""Qwen/Qwen2.5-3B-Instruct""",2.5,100,2,5,500,59,0,1756500000.0,{'runtime': 59},0.04,-0.051357


In [5]:
agg_df = (
    runs_df
    .with_columns((pl.col("limit_samples") * 3).alias("limit_samples"))
    .group_by(["limit_samples", "delta_att"])
    .agg(
      pl.col("success_rate").mean().alias("success_rate"),
      pl.col("z_avg").mean().alias("z_avg"),
    )
    .sort(["delta_att", "limit_samples"])
)

chart = alt.Chart(agg_df.to_pandas()).mark_line(interpolate="monotone").encode(
    x=alt.X("limit_samples", type="quantitative").scale(type="log"),
    y=alt.Y("success_rate", type="quantitative"),
    color=alt.Color("delta_att", type="nominal", title="delta_att"),
)
chart

In [6]:
agg_df.write_csv("../web/graph_data_b0.csv")