In [48]:
import glob
from os import path
import pandas as pd
import json
import numpy as np
from scipy.stats import sem

# ablation

In [100]:
CMD = (
    "CUDA_VISIBLE_DEVICES={} python -m torch.distributed.launch --nproc_per_node=1 "
    "--master_addr='127.0.0.{}' --master_port=2950{} ")
CMD_main = (
    "train_dist.py "
    "--model {arch} --batch_size 128 --lr 1e-04 --test_batch_size 128 "
    "--no_use_several_test_samples --epochs 200 "
    "--save_dir experiments/paper_ablation/{model_name}/v{i} "
    "--tile_size 1200 --data_version v2 --seed {seed}")

# arch, no_augmentation, no_use_pretrained
hyperparams = [
    ["resnet18", False, False],
    ["resnet50", False, True],
    ["resnet50", False, False],
    ["resnet50", True, True],
    ["resnet50", True, False],
    ["wide_resnet50_2", True, True],
    ["wide_resnet50_2", True, False],
    ["wide_resnet50_2", False, True],
    ["wide_resnet50_2", False, False]
]
CHECK_SH = '[ ! -d "{}" ] && '

all_cmds = []
for hyperparam in hyperparams:
    arch, no_augmentation, no_use_pretrained = hyperparam
    for i, seed in enumerate([10, 42, 100]):
        model_name = arch
        if no_augmentation:
            model_name += "_no-augment"
        else:
            model_name += "_augment"
        if no_use_pretrained:
            model_name += "_no-pretrained"
        else:
            model_name += "_pretrained"
        cmd = CMD_main.format(arch=arch, model_name=model_name, i=i, seed=seed)
        if no_augmentation:
            cmd += " --no_augmentation"
        if no_use_pretrained:
            cmd += " --no_use_pretrained"
        save_dir = "experiments/paper_ablation/{model_name}/v{i}".format(
            model_name=model_name, i=i)
        all_cmds.append((cmd, save_dir))
print(len(all_cmds))
print()
gpus = [2]
no_gpu = len(gpus)
no_cmds = len(all_cmds) // no_gpu
for count, i in enumerate(gpus):
    print("; ".join([
        CHECK_SH.format(c[1]) + CMD.format(i, i + 5, i + 5) + c[0] 
        for c in all_cmds[count * no_cmds:(count + 1) * no_cmds]]))

27

[ ! -d "experiments/paper_ablation/resnet18_augment_pretrained/v0" ] && CUDA_VISIBLE_DEVICES=2 python -m torch.distributed.launch --nproc_per_node=1 --master_addr='127.0.0.7' --master_port=29507 train_dist.py --model resnet18 --batch_size 128 --lr 1e-04 --test_batch_size 128 --no_use_several_test_samples --epochs 200 --save_dir experiments/paper_ablation/resnet18_augment_pretrained/v0 --tile_size 1200 --data_version v2 --seed 10; [ ! -d "experiments/paper_ablation/resnet18_augment_pretrained/v1" ] && CUDA_VISIBLE_DEVICES=2 python -m torch.distributed.launch --nproc_per_node=1 --master_addr='127.0.0.7' --master_port=29507 train_dist.py --model resnet18 --batch_size 128 --lr 1e-04 --test_batch_size 128 --no_use_several_test_samples --epochs 200 --save_dir experiments/paper_ablation/resnet18_augment_pretrained/v1 --tile_size 1200 --data_version v2 --seed 42; [ ! -d "experiments/paper_ablation/resnet18_augment_pretrained/v2" ] && CUDA_VISIBLE_DEVICES=2 python -m torch.distributed.launc

# tile size ablation

In [42]:
CMD = (
    "CUDA_VISIBLE_DEVICES={} python -m torch.distributed.launch --nproc_per_node=1 "
    "--master_addr='127.0.0.{}' --master_port=290{} ")
CMD_main = (
    "train_dist.py "
    "--model resnet50 --batch_size 128 --lr 1e-04 --test_batch_size 128 "
    "--no_use_several_test_samples --epochs 200 "
    "--save_dir experiments/tilesize_ablation/resnet50_{tile_size}/v{i} "
    "--tile_size {tile_size} --data_version v2 --seed {seed}")
CHECK_SH = '[ ! -d "{}" ] && '

all_cmds = []
for tile_size in [300, 600, 1200]:
    for i, seed in enumerate([10, 42, 100]):
        cmd = CMD_main.format(tile_size=tile_size, seed=seed, i=i)
        save_dir = "experiments/tilesize_ablation/resnet50_{tile_size}/v{i}".format(
            tile_size=tile_size, i=i)
        all_cmds.append((cmd, save_dir))
gpus = [0, 1, 2, 3]
no_gpu = len(gpus)
no_cmds = len(all_cmds) // no_gpu
for count, i in enumerate(gpus):
    print("; ".join([
        CHECK_SH.format(c[1]) + CMD.format(i, i + 7, i + 7) + c[0] 
        for c in all_cmds[count * no_cmds:(count + 1) * no_cmds]]))

[ ! -d "experiments/tilesize_ablation/resnet50_300/v0" ] && CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --nproc_per_node=1 --master_addr='127.0.0.7' --master_port=2907 train_dist.py --model resnet50 --batch_size 128 --lr 1e-04 --test_batch_size 128 --no_use_several_test_samples --epochs 200 --save_dir experiments/tilesize_ablation/resnet50_300/v0 --tile_size 300 --data_version v2 --seed 10; [ ! -d "experiments/tilesize_ablation/resnet50_300/v1" ] && CUDA_VISIBLE_DEVICES=0 python -m torch.distributed.launch --nproc_per_node=1 --master_addr='127.0.0.7' --master_port=2907 train_dist.py --model resnet50 --batch_size 128 --lr 1e-04 --test_batch_size 128 --no_use_several_test_samples --epochs 200 --save_dir experiments/tilesize_ablation/resnet50_300/v1 --tile_size 300 --data_version v2 --seed 42
[ ! -d "experiments/tilesize_ablation/resnet50_300/v2" ] && CUDA_VISIBLE_DEVICES=1 python -m torch.distributed.launch --nproc_per_node=1 --master_addr='127.0.0.8' --master_port=2908 tra

# evaluation commands

In [120]:
EVAL_CMD = "python train_dist.py --evaluate --save_dir {save_dir}"
# FP = "/data/b2p-siteident/experiments/paper_ablation/*/*"
FP = "/data/b2p-siteident/experiments/tilesize_ablation/*/*"

In [121]:
folders = glob.glob(FP)
all_cmds = []
for folder in folders:
    for no_use_several_samples in [True, False]:       
        
        cmd = EVAL_CMD.format(save_dir=folder)
        if no_use_several_samples:
            cmd += " --no_use_several_test_samples"
        all_cmds.append(cmd)

CMD_GPU = "CUDA_VISIBLE_DEVICES={} "
gpu_jobs = {i: "" for i in range(1)}
for i, cmd in enumerate(all_cmds):
    gpucmd = CMD_GPU.format(3) + cmd + "; "
    gpu_jobs[i % 1] += gpucmd
print(len(all_cmds))

16


In [122]:
gpu_jobs

{0: 'CUDA_VISIBLE_DEVICES=3 python train_dist.py --evaluate --save_dir /data/b2p-siteident/experiments/tilesize_ablation/resnet50_600/v0 --no_use_several_test_samples; CUDA_VISIBLE_DEVICES=3 python train_dist.py --evaluate --save_dir /data/b2p-siteident/experiments/tilesize_ablation/resnet50_600/v0; CUDA_VISIBLE_DEVICES=3 python train_dist.py --evaluate --save_dir /data/b2p-siteident/experiments/tilesize_ablation/resnet50_600/v2 --no_use_several_test_samples; CUDA_VISIBLE_DEVICES=3 python train_dist.py --evaluate --save_dir /data/b2p-siteident/experiments/tilesize_ablation/resnet50_600/v2; CUDA_VISIBLE_DEVICES=3 python train_dist.py --evaluate --save_dir /data/b2p-siteident/experiments/tilesize_ablation/resnet50_600/v1 --no_use_several_test_samples; CUDA_VISIBLE_DEVICES=3 python train_dist.py --evaluate --save_dir /data/b2p-siteident/experiments/tilesize_ablation/resnet50_600/v1; CUDA_VISIBLE_DEVICES=3 python train_dist.py --evaluate --save_dir /data/b2p-siteident/experiments/tilesize_

['/data/b2p-siteident/experiments/paper_ablation/wide_resnet50_2_no-augment_pretrained/v0',
 '/data/b2p-siteident/experiments/paper_ablation/wide_resnet50_2_no-augment_pretrained/v2',
 '/data/b2p-siteident/experiments/paper_ablation/wide_resnet50_2_no-augment_pretrained/v1',
 '/data/b2p-siteident/experiments/paper_ablation/resnet50_no-augment_no-pretrained/v0',
 '/data/b2p-siteident/experiments/paper_ablation/resnet50_no-augment_no-pretrained/v2',
 '/data/b2p-siteident/experiments/paper_ablation/resnet50_no-augment_no-pretrained/v1',
 '/data/b2p-siteident/experiments/paper_ablation/resnet18_augment_pretrained/v0',
 '/data/b2p-siteident/experiments/paper_ablation/resnet18_augment_pretrained/v2',
 '/data/b2p-siteident/experiments/paper_ablation/resnet18_augment_pretrained/v1',
 '/data/b2p-siteident/experiments/paper_ablation/resnet50_augment_no-pretrained/v0',
 '/data/b2p-siteident/experiments/paper_ablation/resnet50_augment_no-pretrained/v2',
 '/data/b2p-siteident/experiments/paper_abla

# evaluation

In [117]:
abl_fp = "/data/b2p-siteident/experiments/paper_ablation/*"

In [118]:
header = [
    "data_mod", 
    # "version", 
    "model",
    "freeze", "use_several_samples", 
    # "tile_size", 
    "pretrained", "augment"
]
metrics = [
    # "val_acc", "val_weighted_f1",
    "test_acc",
    # "test_weighted_f1",
    "test_rw_acc",
    # "test_rw_weighted_f1",
    "test_ug_acc", 
    # "test_ug_weighted_f1",
    "ecount"
]

entries_dict = {}
for exp_fp in sorted(glob.glob(path.join(abl_fp, "*"))):
    if not (path.isfile(path.join(exp_fp, "stats_True_32.json")) and
            path.isfile(path.join(exp_fp, "stats_False_32.json"))):
            print("{} not finished.".format(exp_fp))
            continue
    with open(path.join(exp_fp, "opts.json")) as f:
        opts = json.load(f)
    
    with open(path.join(exp_fp, "stats_True_32.json")) as f:
        stats_no_several = json.load(f)
    
    with open(path.join(exp_fp, "stats_False_32.json")) as f:
        stats_several = json.load(f)
    
    data_mod = "small" if len(opts["data_modalities"]) < 7 else "large"
    data_version = opts["data_version"]
    freeze = opts["use_last_n_layers"]
    tile_size = opts["tile_size"]
    pretrained = not opts["no_use_pretrained"]
    augment = not opts["no_augmentation"]
    model = opts["model"]
    for use_several in [True, False]:
        if use_several:
            stats = stats_several
        else:
            stats = stats_no_several
        key = (data_mod, 
               # data_version, 
               model, freeze, use_several, 
               # tile_size, 
               pretrained, augment)
        if key not in entries_dict:
            entries_dict[key] = {m: [] for m in metrics}
            entries_dict[key]["ecount"] = 0
            entries_dict[key]["path"] = exp_fp
        for m in metrics:
            if m == "ecount":
                continue
            entries_dict[key][m].append(stats[m])
        entries_dict[key]["ecount"] += 1

entries = []
i = 0
for k, entry in entries_dict.items():
    avg_entry = list(k)
    for m in metrics:
        if m == "ecount":
            avg_entry.append(entry[m])
        else:
            val = np.array(entry[m])
            if "acc" in m:
                val *= 100
            avg_entry.append(round(np.mean(val), 2))
            avg_entry.append(round(sem(val), 2))
    entries.append(avg_entry)
    i += 1

metrics_header = []
for m in metrics:
    if m == "ecount":
        metrics_header.append(m)
    else:
        metrics_header.append(m + "_m")
        metrics_header.append(m + "_ste")
df = pd.DataFrame(entries, columns=header + metrics_header)

/data/b2p-siteident/experiments/paper_ablation/wide_resnet50_2_augment_no-pretrained/v0 not finished.
/data/b2p-siteident/experiments/paper_ablation/wide_resnet50_2_augment_no-pretrained/v1 not finished.
/data/b2p-siteident/experiments/paper_ablation/wide_resnet50_2_augment_no-pretrained/v2 not finished.
/data/b2p-siteident/experiments/paper_ablation/wide_resnet50_2_augment_pretrained/v0 not finished.


In [119]:
df.sort_values(by=["model", "use_several_samples", "augment", "pretrained"])

Unnamed: 0,data_mod,model,freeze,use_several_samples,pretrained,augment,test_acc_m,test_acc_ste,test_rw_acc_m,test_rw_acc_ste,test_ug_acc_m,test_ug_acc_ste,ecount
1,large,resnet18,-1,False,True,True,81.26,0.85,76.85,1.42,80.21,1.88,3
0,large,resnet18,-1,True,True,True,82.95,0.37,78.06,0.6,83.07,1.88,3
7,large,resnet50,-1,False,False,False,79.92,0.83,69.82,0.84,81.25,0.9,3
9,large,resnet50,-1,False,True,False,79.19,1.1,72.79,1.33,77.86,1.82,3
3,large,resnet50,-1,False,False,True,78.88,0.97,68.42,0.63,83.07,1.45,3
5,large,resnet50,-1,False,True,True,81.18,0.37,72.39,0.5,81.77,0.69,3
6,large,resnet50,-1,True,False,False,81.26,0.41,73.47,1.05,81.25,1.35,3
8,large,resnet50,-1,True,True,False,79.65,1.57,74.14,1.77,79.95,0.69,3
2,large,resnet50,-1,True,False,True,80.91,0.73,71.4,1.8,80.47,3.12,3
4,large,resnet50,-1,True,True,True,81.57,0.35,74.32,0.47,82.03,1.19,3
