In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
run_name = input()

 architectures


In [3]:
# System imports
import os
import sys
import yaml

# External imports
import matplotlib.pyplot as plt
import scipy as sp
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics import auc
import numpy as np
import pandas as pd
import seaborn as sns
import torch
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
from pytorch_lightning import Trainer
import frnn
import wandb
import math
sys.path.append('../..')

from LightningModules.DualEmbedding.Models.vanilla_dual_embedding import VanillaDualEmbedding

device = "cuda" if torch.cuda.is_available() else "cpu"
from pytorch_lightning.callbacks import ModelCheckpoint

In [4]:
def kaiming_init(model):
    for name, param in model.named_parameters():
        if name.endswith(".bias"):
            param.data.fill_(0)
        elif name.startswith("layers.0"):  # The first layer does not have ReLU applied on its input
            param.data.normal_(0, 1 / math.sqrt(param.shape[1]))
        else:
            param.data.normal_(0, math.sqrt(2) / math.sqrt(param.shape[1]))

## Sweep

In [5]:
with open("dual_embedding_sweep.yaml") as f:
        sweep_hparams = yaml.load(f, Loader=yaml.FullLoader)
with open("dual_embedding_default.yaml") as f:
        default_hparams = yaml.load(f, Loader=yaml.FullLoader)

In [6]:
sweep_configuration = {
    "name": run_name,
    "project": "ITk_barrell_dual_embedding",
    "metric": {"name": "pur", "goal": "maximize"},
    "method": "grid",
    "parameters": sweep_hparams
}

In [7]:
def load_dict(model):
    checkpoint = torch.load("/global/cfs/cdirs/m3443/usr/ryanliu/ITk_embedding/ITk_dual_embedding/3ijb4qnw/checkpoints/last.ckpt")
    state_dict = checkpoint["state_dict"]
    names = [i for i in state_dict]
    for i in names:
        state = state_dict[i]
        i = i.replace("input_layer1", "input_layer2")
        i = i.replace("layers1", "layers2")
        i = i.replace("output_layer1", "output_layer2")
        state_dict[i] = state

    model.load_state_dict(state_dict)
    del state_dict
    del checkpoint
    return model

In [8]:
def training():
    wandb.init()
    model = VanillaDualEmbedding({**default_hparams, **wandb.config})
    
    if model.hparams["use_dual_encoder"]:
        model = load_dict(model)
    else:
        kaiming_init(model)
    
    checkpoint_callback = ModelCheckpoint(
        monitor='pur',
        mode="max",
        save_top_k=2,
        save_last=True)

    logger = WandbLogger()
    trainer = Trainer(gpus=1, max_epochs=default_hparams["max_epochs"], log_every_n_steps = 50, logger=logger, callbacks=[checkpoint_callback], default_root_dir="/global/cfs/cdirs/m3443/usr/ryanliu/ITk_dual_embedding/")
    trainer.fit(model)

In [None]:
sweep_id = wandb.sweep(sweep_configuration, project = "ITk_barrel_dual_embedding")

# run the sweep
wandb.agent(sweep_id, function=training)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 6y0m08n8
Sweep URL: https://wandb.ai/exatrkx/ITk_barrel_dual_embedding/sweeps/6y0m08n8


[34m[1mwandb[0m: Agent Starting Run: hv4fwgw9 with config:
[34m[1mwandb[0m: 	use_bidir_truth: False
[34m[1mwandb[0m: 	use_dual_encoder: True
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mexatrkx[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


Validation sanity check:  50%|█████     | 1/2 [00:01<00:01,  1.60s/it]



                                                                      

  rank_zero_warn(


Epoch 0:  99%|█████████▉| 1000/1010 [03:56<00:02,  4.22it/s, loss=0.121, v_num=wgw9]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/10 [00:00<?, ?it/s][A
Epoch 0:  99%|█████████▉| 1002/1010 [03:58<00:01,  4.20it/s, loss=0.121, v_num=wgw9]
Validating:  20%|██        | 2/10 [00:02<00:08,  1.11s/it][A
Epoch 0:  99%|█████████▉| 1004/1010 [04:00<00:01,  4.18it/s, loss=0.121, v_num=wgw9]
Validating:  40%|████      | 4/10 [00:04<00:05,  1.03it/s][A
Epoch 0: 100%|█████████▉| 1006/1010 [04:02<00:00,  4.16it/s, loss=0.121, v_num=wgw9]
Validating:  60%|██████    | 6/10 [00:05<00:03,  1.23it/s][A
Epoch 0: 100%|█████████▉| 1008/1010 [04:03<00:00,  4.14it/s, loss=0.121, v_num=wgw9]
Validating:  80%|████████  | 8/10 [00:06<00:01,  1.44it/s][A
Epoch 0: 100%|██████████| 1010/1010 [04:04<00:00,  4.13it/s, loss=0.121, v_num=wgw9]
Epoch 0: 100%|██████████| 1010/1010 [04:05<00:00,  4.11it/s, loss=0.121, v_num=wgw9]
Epoch 1:  99%|█████████▉| 1000/1010 [04:03<00:02,  4.10it/s, loss=0.1

## Construct PyLightning model

In [4]:
with open("dual_embedding_default.yaml") as f:
    hparams = yaml.load(f, Loader=yaml.FullLoader)

In [5]:
model = VanillaDualEmbedding(hparams)

## Metric Learning

In [6]:
from pytorch_lightning.callbacks import ModelCheckpoint

checkpoint_callback = ModelCheckpoint(
    monitor='pur',
    mode="max",
    save_top_k=2,
    save_last=True)

In [None]:
kaiming_init(model)
logger = WandbLogger(project="ITk_dual_embedding")
trainer = Trainer(gpus=1, max_epochs=hparams["max_epochs"], logger=logger, num_sanity_val_steps=2, callbacks=[checkpoint_callback], log_every_n_steps = 50, default_root_dir="/global/cfs/cdirs/m3443/usr/ryanliu/ITk_embedding/")
trainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mexatrkx[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade



  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


Validation sanity check:  50%|█████     | 1/2 [00:02<00:02,  2.63s/it]



                                                                      

  rank_zero_warn(


Epoch 0:  99%|█████████▉| 1000/1010 [04:17<00:02,  3.88it/s, loss=0.0983, v_num=q6d8]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/10 [00:00<?, ?it/s][A
Epoch 0:  99%|█████████▉| 1002/1010 [04:20<00:02,  3.84it/s, loss=0.0983, v_num=q6d8]
Validating:  20%|██        | 2/10 [00:05<00:20,  2.52s/it][A
Epoch 0:  99%|█████████▉| 1004/1010 [04:25<00:01,  3.78it/s, loss=0.0983, v_num=q6d8]
Validating:  40%|████      | 4/10 [00:10<00:14,  2.49s/it][A
Epoch 0: 100%|█████████▉| 1006/1010 [04:30<00:01,  3.72it/s, loss=0.0983, v_num=q6d8]
Validating:  60%|██████    | 6/10 [00:14<00:09,  2.32s/it][A
Epoch 0: 100%|█████████▉| 1008/1010 [04:34<00:00,  3.67it/s, loss=0.0983, v_num=q6d8]
Validating:  80%|████████  | 8/10 [00:18<00:04,  2.08s/it][A
Epoch 0: 100%|██████████| 1010/1010 [04:38<00:00,  3.63it/s, loss=0.0983, v_num=q6d8]
Epoch 0: 100%|██████████| 1010/1010 [04:40<00:00,  3.60it/s, loss=0.0983, v_num=q6d8]
Epoch 1:  99%|█████████▉| 1000/1010 [03:50<00:02,  4.34it/s, l

## Initialize from trained model

In [4]:
import wandb
wandb.finish()

with open("dual_embedding_default.yaml") as f:
    hparams = yaml.load(f, Loader=yaml.FullLoader)
    
hparams["use_dual_encoder"] = True

model = VanillaDualEmbedding(hparams)

checkpoint = torch.load("/global/cfs/cdirs/m3443/usr/ryanliu/ITk_embedding/ITk_dual_embedding/3ijb4qnw/checkpoints/last.ckpt")
state_dict = checkpoint["state_dict"]
names = [i for i in state_dict]
for i in names:
    state = state_dict[i]
    i = i.replace("input_layer1", "input_layer2")
    i = i.replace("layers1", "layers2")
    i = i.replace("output_layer1", "output_layer2")
    state_dict[i] = state

model.load_state_dict(state_dict)
del state_dict

In [5]:
from pytorch_lightning.callbacks import ModelCheckpoint

checkpoint_callback = ModelCheckpoint(
    monitor='pur',
    mode="max",
    save_top_k=2,
    save_last=True)

In [None]:
logger = WandbLogger(project="ITk_dual_embedding")
trainer = Trainer(gpus=1, max_epochs=hparams["max_epochs"], logger=logger, num_sanity_val_steps=2, callbacks=[checkpoint_callback], log_every_n_steps = 50, default_root_dir="/global/cfs/cdirs/m3443/usr/ryanliu/ITk_embedding/")
trainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mexatrkx[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade



  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


Validation sanity check:  50%|█████     | 1/2 [00:01<00:01,  1.46s/it]



                                                                      

  rank_zero_warn(


Epoch 0:  99%|█████████▉| 1000/1010 [03:52<00:02,  4.29it/s, loss=0.0123, v_num=uh1o]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/10 [00:00<?, ?it/s][A
Epoch 0:  99%|█████████▉| 1002/1010 [03:56<00:01,  4.24it/s, loss=0.0123, v_num=uh1o]
Validating:  20%|██        | 2/10 [00:05<00:19,  2.42s/it][A
Epoch 0:  99%|█████████▉| 1004/1010 [04:00<00:01,  4.18it/s, loss=0.0123, v_num=uh1o]
Validating:  40%|████      | 4/10 [00:09<00:13,  2.19s/it][A
Epoch 0: 100%|█████████▉| 1006/1010 [04:04<00:00,  4.11it/s, loss=0.0123, v_num=uh1o]
Validating:  60%|██████    | 6/10 [00:13<00:08,  2.17s/it][A
Epoch 0: 100%|█████████▉| 1008/1010 [04:08<00:00,  4.06it/s, loss=0.0123, v_num=uh1o]
Validating:  80%|████████  | 8/10 [00:16<00:03,  1.80s/it][A
Epoch 0: 100%|██████████| 1010/1010 [04:11<00:00,  4.02it/s, loss=0.0123, v_num=uh1o]
Epoch 0: 100%|██████████| 1010/1010 [04:13<00:00,  3.98it/s, loss=0.0123, v_num=uh1o]
Epoch 1:  99%|█████████▉| 1000/1010 [04:04<00:02,  4.09it/s, l