In [20]:
import neptune
from neptune.sessions import Session
from pathlib import Path
import json
import zipfile
from tqdm import tqdm
import yaml
from pprint import pprint

PROJECT = "kjang0517/msmarco"

session = Session()
project = session.get_project(project_qualified_name=PROJECT)
# neptune.init(project_qualified_name="kjang0517/msmarco")
# neptune.get_experiments()



# 실험 기록 및 결과물 다운로드
* `state == 'succeeded'`
* `download_artifacts()`

In [2]:
base_dir = Path("experiments")
if not base_dir.exists():
    base_dir.mkdir(parents=True)

target_exps = [exp for exp in project.get_experiments() if exp.state == "succeeded"]
for exp in tqdm(target_exps):
    destination_dir = base_dir / exp.id
    # check already exist
    if destination_dir.exists():
        print("Skipping", exp.id)
        continue
    else:
        destination_dir.mkdir(parents=True)
        
    # start fetching
    print("Fetching", exp.id)
        
    # parameters
    params = exp.get_parameters()
    with (destination_dir / "parameters.json").open("w", encoding="utf-8") as f:
        json.dump(params, f, indent=4)

    # channels
    channel_names = list(exp.get_channels().keys())
    channel_df = exp.get_numeric_channels_values(*channel_names)
    channel_df.to_csv(destination_dir / "channels.csv")

    # artifacts
    exp.download_artifacts(destination_dir=str(destination_dir))

  0%|                                                                                           | 0/30 [00:00<?, ?it/s]

Skipping MSMAR-103
Skipping MSMAR-106
Skipping MSMAR-113
Skipping MSMAR-116
Skipping MSMAR-137
Skipping MSMAR-138
Skipping MSMAR-139
Skipping MSMAR-141
Skipping MSMAR-142
Skipping MSMAR-166
Skipping MSMAR-167
Skipping MSMAR-170
Skipping MSMAR-176
Skipping MSMAR-177
Skipping MSMAR-178
Skipping MSMAR-179
Skipping MSMAR-180
Skipping MSMAR-181
Skipping MSMAR-182
Skipping MSMAR-183
Skipping MSMAR-184
Skipping MSMAR-186
Skipping MSMAR-187
Skipping MSMAR-188
Fetching MSMAR-189


 83%|████████████████████████████████████████████████████████████████████▎             | 25/30 [00:18<00:03,  1.37it/s]

Skipping MSMAR-191
Fetching MSMAR-192


 90%|█████████████████████████████████████████████████████████████████████████▊        | 27/30 [01:03<00:22,  7.37s/it]

Fetching MSMAR-193


 93%|████████████████████████████████████████████████████████████████████████████▌     | 28/30 [01:46<00:35, 17.93s/it]

Fetching MSMAR-194


 97%|███████████████████████████████████████████████████████████████████████████████▎  | 29/30 [02:01<00:16, 16.92s/it]

Fetching MSMAR-195


100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [02:20<00:00,  4.69s/it]


# Unzip Artifacts

In [4]:
zip_files = list(base_dir.glob("**/*.zip"))
for zpath in zip_files:
    print("Processing", zpath)
    with zipfile.ZipFile(zpath) as zf:
        zf.extractall(path=zpath.parent)
    zpath.unlink()

# Inspect Model Properties

In [36]:
def get_model_properties(hparams_path):
    with open(hparams_path, encoding="utf-8") as f:
        hparams = yaml.load(f, yaml.SafeLoader)

    # properties
    properties = []
    ## base_emb
    properties.append(hparams["dataset"]["emb_path"].split("/")[1])
    ## base_emb_dim
    properties.append(hparams["model"]["input_size"])
    ## model_name
    properties.append(hparams["model"]["name"])
    ## model_arch
    properties.append(
        f"{'_'.join([str(v) for v in hparams['model']['n']])}_{'_'.join((str(v) for v in hparams['model']['k']))}"
    )
    ## losses
    losses = []
    if hparams["loss"]["use_recovery_loss"]:
        losses.append("recover")
    if hparams["loss"]["use_task_loss"]:
        losses.append("task")
    properties.append("-".join(losses))
    ## bs & lr
    properties.append(hparams["train"]["batch_size"])
    properties.append(hparams["train"]["learning_rate"])
    # return as a single string
    return "_".join([str(p) for p in properties])


def filter_properties(hparams, conditions):
    if not isinstance(conditions, list):
        conditions = [conditions]
    return {k: v for k, v in hparams.items() if all([(cond in v) for cond in conditions])}

In [46]:
hparams = {}
hparam_files = list(base_dir.glob("**/*.yaml"))
for f in hparam_files:
    exp_id = [p for p in f.parts if "MSMAR-" in p][0]
    properties = get_model_properties(f)
    if exp_id not in hparams:
        hparams[exp_id] = properties
hparams = {k: v for k, v in sorted(hparams.items(), key=lambda item: item[1])}

## bert

In [57]:
filter_properties(hparams, ['bert', 'recover-task'])

{'MSMAR-103': 'bert_768_wta_2048_0.02_recover-task_8192_0.0005',
 'MSMAR-106': 'bert_768_wta_4096_0.02_recover-task_8192_0.0005',
 'MSMAR-116': 'bert_768_wta_8192_0.02_recover-task_8192_0.0005'}

## WTA

In [55]:
filter_properties(hparams, ['wta', 'recover-task'])

{'MSMAR-103': 'bert_768_wta_2048_0.02_recover-task_8192_0.0005',
 'MSMAR-106': 'bert_768_wta_4096_0.02_recover-task_8192_0.0005',
 'MSMAR-116': 'bert_768_wta_8192_0.02_recover-task_8192_0.0005',
 'MSMAR-137': 'fse_300_wta_2048_0.02_recover-task_8192_0.0005',
 'MSMAR-142': 'fse_300_wta_2048_0.05_recover-task_8192_0.0001',
 'MSMAR-138': 'fse_300_wta_4096_0.02_recover-task_8192_0.0005',
 'MSMAR-141': 'fse_300_wta_4096_0.05_recover-task_8192_0.0001',
 'MSMAR-139': 'fse_300_wta_8192_0.02_recover-task_8192_0.0005',
 'MSMAR-180': 'fse_300_wta_8192_0.02_recover-task_8192_0.0005',
 'MSMAR-170': 'fse_300_wta_8192_0.05_recover-task_8192_0.0001',
 'MSMAR-177': 'fse_300_wta_8192_0.05_recover-task_8192_0.0001'}

## SparseNet

In [51]:
filter_properties(hparams, 'sparsenet')

{'MSMAR-189': 'fse_300_sparsenet_2048_40_recover-task_8192_0.0005'}

## recover

In [48]:
filter_properties(hparams, 'recover_')

{'MSMAR-167': 'bert_768_wta_2048_0.02_recover_8192_0.0005',
 'MSMAR-183': 'bert_768_wta_2048_0.05_recover_8192_0.0005',
 'MSMAR-188': 'bert_768_wta_4096_0.02_recover_8192_0.0005',
 'MSMAR-195': 'bert_768_wta_4096_0.05_recover_8192_0.0005',
 'MSMAR-166': 'fse_300_wta_2048_0.02_recover_8192_0.0005',
 'MSMAR-181': 'fse_300_wta_2048_0.05_recover_8192_0.0005',
 'MSMAR-184': 'fse_300_wta_4096_0.02_recover_8192_0.0005',
 'MSMAR-191': 'fse_300_wta_4096_0.05_recover_8192_0.0005',
 'MSMAR-194': 'fse_300_wta_8192_0.02_recover_8192_0.0005'}

## recover-task

In [34]:
filter_properties(hparams, 'recover-task')

{'MSMAR-103': 'bert_768_wta_2048_0.02_recover-task_8192_0.0005',
 'MSMAR-106': 'bert_768_wta_4096_0.02_recover-task_8192_0.0005',
 'MSMAR-113': 'bert_768_wta_8192_0.02_recover-task_8192_0.0005',
 'MSMAR-116': 'bert_768_wta_8192_0.02_recover-task_8192_0.0005',
 'MSMAR-189': 'fse_300_sparsenet_2048_40_recover-task_8192_0.0005',
 'MSMAR-137': 'fse_300_wta_2048_0.02_recover-task_8192_0.0005',
 'MSMAR-142': 'fse_300_wta_2048_0.05_recover-task_8192_0.0001',
 'MSMAR-138': 'fse_300_wta_4096_0.02_recover-task_8192_0.0005',
 'MSMAR-141': 'fse_300_wta_4096_0.05_recover-task_8192_0.0001',
 'MSMAR-139': 'fse_300_wta_8192_0.02_recover-task_8192_0.0005',
 'MSMAR-180': 'fse_300_wta_8192_0.02_recover-task_8192_0.0005',
 'MSMAR-170': 'fse_300_wta_8192_0.05_recover-task_8192_0.0001',
 'MSMAR-177': 'fse_300_wta_8192_0.05_recover-task_8192_0.0001'}