# [project page](https://wandb.ai/mines-paristech-cmm/fcdd-mvtec-dev00-checkpoint02)

i will add the test avg precision metric to some old runs and compute a stability metric for auc/avg-precision

In [None]:
# make a cell print all the outputs instead of just the last one
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# get runs

In [None]:
WANDB_ENTITY = "mines-paristech-cmm"
WANDB_PROJECT = "fcdd-mvtec-dev00-checkpoint02"
WANDB_ENTITY_PROJECT = f"{WANDB_ENTITY}/{WANDB_PROJECT}"

WANDB_SWEEP_ID = None
WANDB_SWEEP_PATH = f"{WANDB_ENTITY_PROJECT}/{WANDB_SWEEP_ID}" if WANDB_SWEEP_ID else None

import wandb
api = wandb.Api()

sweep = api.sweep(WANDB_SWEEP_PATH) if WANDB_SWEEP_PATH else None
runs = api.runs(WANDB_ENTITY_PROJECT) if sweep is None else sweep.runs

from collections import defaultdict
lists = defaultdict(list)

def append_to_list(key, value):
    lists[key].append(value)

for run in runs: 
    append_to_list("summary", run.summary._json_dict)
    append_to_list("config", {k: v for k,v in run.config.items() if not k.startswith('_')})
    append_to_list("name", run.name)
    append_to_list("tags", run.tags)
    append_to_list("id", run.id)
    append_to_list("state", run.state)
    
import pandas as pd
runs_df = pd.DataFrame.from_dict(data=lists)

runs_df.shape
runs_df.columns

In [None]:
summary_keys = sorted(set.union(*runs_df["summary"].apply(lambda x: set(x.keys())).tolist()))
"summary"
"   ".join(summary_keys)

config_keys = sorted(set.union(*runs_df["config"].apply(lambda x: set(x.keys())).tolist()))
"config"
"   ".join(config_keys)

tags = sorted(set.union(*runs_df["tags"].apply(lambda x: set(x)).tolist()))
"tags"
"   ".join(tags)

# filter runs

In [None]:
runs_df_filtered = runs_df

is_report_bmvc_02 = runs_df['tags'].apply(lambda x: "report-bmvc-02" in x)
runs_df_filtered = runs_df[is_report_bmvc_02]

runs_df_filtered.shape

# make new columns

In [None]:
import numpy as np

def extract(df, from_column, key):
    return df[from_column].apply(lambda x: x.get(key, None))

df = runs_df_filtered
config_keys = [
    "loss_mode",
    "noise_mode",
    "normal_class",
    "normal_class_label",
    "logdir",
    "datadir",
    "preproc",
    "batch_size",
]
summary_keys = [
    "test_rocauc",
]
for key in config_keys:
    df[key] = extract(df, "config", key)
for key in summary_keys:
    df[key] = extract(df, "summary", key)
del df
runs_df_filtered.columns
runs_df_filtered[config_keys].head(5)

# categorical vals 

In [None]:
for key in [
    "loss_mode",
    "noise_mode",
    "normal_class_label",
]:
    print(f"{key}: {runs_df_filtered[key].unique()}")

# get metric histories

In [None]:
import progressbar

df = runs_df_filtered
keys = [
]

# get history of signals
hists_per_run = []

for run_id in progressbar.progressbar(df["id"], max_value=df.shape[0]):
    run = api.run(f"{WANDB_ENTITY_PROJECT}/{run_id}")
    hist = run.scan_history(keys=keys)
    hists_per_run.append([np.array([dic[key] for dic in hist]) for key in keys])

# transpose the list of lists
hists_per_run = list(map(list, zip(*hists_per_run)))


for key, list_ in zip(keys, hists_per_run):
    df[key] = list_
    
del df

# find model params and load

In [None]:
arow = runs_df_filtered.iloc[0]
arow

In [None]:
%pwd

In [None]:
from pathlib import Path
dev_dir = Path("../python/dev").resolve()
dev_dir

In [None]:
!ln -s ../python/dev/mvtec_dataset_dev01_bis.py 

In [None]:
!ln -s ../python/dev/common_dev01_bis.py 

In [None]:
!ln -s ../python/dev/data_dev01_bis.py 

In [None]:
!ln -s ../python/dev/model_dev01_bis.py 

In [None]:
!ln -s ../python/dev/callbacks_dev01_bis.py 

In [None]:
!ln -s ../python/dev/hacked_dev01.py 

In [None]:
ls -lh *_dev01_bis.py

## datamodule

In [None]:
# loader = DataLoader(ds, batch_size=16, num_workers=0)
from mvtec_dataset_dev01_bis import MVTecAnomalyDetectionDataModule, DATAMODULE_PREPROCESS_MOMENT_BEFORE_BATCH_TRANSFER, SUPERVISE_MODE_REAL_ANOMALY
datamodule = MVTecAnomalyDetectionDataModule(
    root=(dev_dir / arow["datadir"]).resolve(),
    normal_class=arow["normal_class"],
    preprocessing=arow["preproc"],
    preprocess_moment=DATAMODULE_PREPROCESS_MOMENT_BEFORE_BATCH_TRANSFER,
    supervise_mode=SUPERVISE_MODE_REAL_ANOMALY,
    batch_size=int(arow["batch_size"]),
    nworkers=0,
    pin_memory=False,
    seed=0,
    raw_shape=(240, 240),
    net_shape=(224, 224),
    real_anomaly_limit=1,
)
datamodule.prepare_data()
datamodule.setup()

## model

In [None]:
import torch
# from fcdd.models.fcdd_cnn_224 import FCDD_CNN224_VGG_F
from model_dev01_bis import FCDD

logdir = arow["logdir"]

net = FCDD(
    in_shape=(224, 224),
    model_name="FCDD_CNN224_VGG_F",
    # these values dont matter
    optimizer_name="sgd",  
    lr=1e-3,
    weight_decay=1e-5,
    scheduler_name="lambda",
    scheduler_parameters=[.999],
    loss_name="old-fcdd",
    dropout_mode=None,
    dropout_parameters=[],
)
snapshot_fpath = (dev_dir / logdir / "snapshot.pt").resolve()

if torch.cuda.is_available():
    snapshot = torch.load(snapshot_fpath)

else:
    snapshot = torch.load(snapshot_fpath, map_location=torch.device('cpu'))

net_state = snapshot.pop('net', None)
assert net_state is not None
net.load_state_dict(net_state)
net.eval();

In [None]:
# steps_outputs = []
# for idx, batch in enumerate(datamodule.test_dataloader(embed_preprocessing=True)):
#     steps_outputs.append(net.test_step(batch, idx));
# net.test_epoch_end(steps_outputs);
# net.last_epoch_outputs.keys()

## logger

In [None]:
from pytorch_lightning.loggers import WandbLogger
import wandb
wandb_logger = WandbLogger(
    id=arow["id"],
    project=WANDB_PROJECT,
    entity=WANDB_ENTITY,
)

## trainer

In [None]:
from pytorch_lightning import Trainer
from callbacks_dev01_bis import LogPrcurveCallback
from common_dev01_bis import create_python_random_generator

trainer = Trainer(
    accelerator="cuda" if torch.cuda.is_available() else "cpu",
    gpus=1, 
    logger=wandb_logger,  
    callbacks=[
        LogPrcurveCallback(
            scores_key="score_maps",
            gt_key="gtmaps",
            log_curve=False,
            limit_points=None,
            # doesnt matter because limit_points is None
            python_generator=create_python_random_generator(0),  
            stage="test",
        )
    ], 
)
    

In [None]:
trainer.test(model=net, datamodule=datamodule) 

In [None]:
wandb_logger.close()
wandb.finish()

# test all runs

In [None]:
from mvtec_dataset_dev01_bis import MVTecAnomalyDetectionDataModule, DATAMODULE_PREPROCESS_MOMENT_BEFORE_BATCH_TRANSFER, SUPERVISE_MODE_REAL_ANOMALY
import torch
from pytorch_lightning.loggers import WandbLogger
import wandb
from model_dev01_bis import FCDD
from pytorch_lightning import Trainer
from callbacks_dev01_bis import LogPrcurveCallback
from common_dev01_bis import create_python_random_generator

for idx, (rowidx, row) in enumerate(runs_df_filtered.iterrows()):
    
    if idx >= 6:
        break
    
    runid = row["id"]
    run = api.run(f"{WANDB_ENTITY_PROJECT}/{runid}")
    
    if "test/avg-precision" in run.summary.keys():
        print(f"skipping {runid}")
        continue
    
    logdir = arow["logdir"]
    batch_size = int(arow["batch_size"])
    normal_class = arow["normal_class"]
    preproc = arow["preproc"]
    datadir = arow["datadir"]
        
    net = FCDD(
        in_shape=(224, 224),
        model_name="FCDD_CNN224_VGG_F",
        # these values dont matter
        optimizer_name="sgd",  
        lr=1e-3,
        weight_decay=1e-5,
        scheduler_name="lambda",
        scheduler_parameters=[.999],
        loss_name="old-fcdd",
        dropout_mode=None,
        dropout_parameters=[],
    )
    
    snapshot_fpath = (dev_dir / logdir / "snapshot.pt").resolve()

    if torch.cuda.is_available():
        snapshot = torch.load(snapshot_fpath)

    else:
        snapshot = torch.load(snapshot_fpath, map_location=torch.device('cpu'))

    net.load_state_dict(snapshot.pop('net', None))
    net.eval();
    
    wandb_logger = WandbLogger(id=runid, project=WANDB_PROJECT, entity=WANDB_ENTITY,)
    trainer = Trainer(
        accelerator="cuda" if torch.cuda.is_available() else "cpu",
        gpus=1, 
        logger=wandb_logger,  
        callbacks=[
            LogPrcurveCallback(
                scores_key="score_maps", gt_key="gtmaps", log_curve=False, limit_points=None,
                # doesnt matter because limit_points is None
                python_generator=create_python_random_generator(0), stage="test",
            )
        ], 
    )
    
    trainer.test(
        model=net, 
        datamodule=MVTecAnomalyDetectionDataModule(
            root=(dev_dir / datadir).resolve(),
            normal_class=normal_class,
            preprocessing=preproc,
            preprocess_moment=DATAMODULE_PREPROCESS_MOMENT_BEFORE_BATCH_TRANSFER,
            supervise_mode=SUPERVISE_MODE_REAL_ANOMALY,
            batch_size=batch_size, 
            nworkers=0,
            pin_memory=False,
            seed=0,
            raw_shape=(240, 240),
            net_shape=(224, 224),
            real_anomaly_limit=1,
        )
    ) 
    
    wandb_logger.close()
    wandb.finish()