In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
run_name = input()

 pretrained_weight_and_margin


In [3]:
# System imports
import os
import sys
import yaml

# External imports
import matplotlib.pyplot as plt
import scipy as sp
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics import auc
import numpy as np
import pandas as pd
import seaborn as sns
import torch
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
from pytorch_lightning import Trainer
import frnn
import wandb
import math
sys.path.append('../..')

from LightningModules.DualEmbedding.Models.vanilla_dual_embedding import VanillaDualEmbedding

device = "cuda" if torch.cuda.is_available() else "cpu"
from pytorch_lightning.callbacks import ModelCheckpoint

In [4]:
def kaiming_init(model):
    for name, param in model.named_parameters():
        if name.endswith(".bias"):
            param.data.fill_(0)
        elif name.startswith("layers.0"):  # The first layer does not have ReLU applied on its input
            param.data.normal_(0, 1 / math.sqrt(param.shape[1]))
        else:
            param.data.normal_(0, math.sqrt(2) / math.sqrt(param.shape[1]))

## Sweep

In [5]:
with open("dual_embedding_sweep.yaml") as f:
        sweep_hparams = yaml.load(f, Loader=yaml.FullLoader)
with open("dual_embedding_default.yaml") as f:
        default_hparams = yaml.load(f, Loader=yaml.FullLoader)

In [6]:
sweep_configuration = {
    "name": run_name,
    "project": "ITk_barrell_dual_embedding",
    "metric": {"name": "pur", "goal": "maximize"},
    "method": "grid",
    "parameters": sweep_hparams
}

In [7]:
def load_dict(model):
    checkpoint = torch.load("/global/cfs/cdirs/m3443/usr/ryanliu/ITk_embedding/ITk_dual_embedding/3ijb4qnw/checkpoints/last.ckpt")
    state_dict = checkpoint["state_dict"]
    names = [i for i in state_dict]
    for i in names:
        state = state_dict[i]
        i = i.replace("input_layer1", "input_layer2")
        i = i.replace("layers1", "layers2")
        i = i.replace("output_layer1", "output_layer2")
        state_dict[i] = state

    model.load_state_dict(state_dict)
    del state_dict
    return model

In [8]:
def training():
    wandb.init()
    model = VanillaDualEmbedding({**default_hparams, **wandb.config})
    # kaiming_init(model)
    model = load_dict(model)
    checkpoint_callback = ModelCheckpoint(
        monitor='pur',
        mode="max",
        save_top_k=2,
        save_last=True)

    logger = WandbLogger()
    trainer = Trainer(gpus=1, max_epochs=default_hparams["max_epochs"], log_every_n_steps = 50, logger=logger, callbacks=[checkpoint_callback], default_root_dir="/global/cfs/cdirs/m3443/usr/ryanliu/ITk_dual_embedding/")
    trainer.fit(model)

In [None]:
sweep_id = wandb.sweep(sweep_configuration, project = "ITk_barrel_dual_embedding")

# run the sweep
wandb.agent(sweep_id, function=training)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 9ycd43iv
Sweep URL: https://wandb.ai/exatrkx/ITk_barrel_dual_embedding/sweeps/9ycd43iv


[34m[1mwandb[0m: Agent Starting Run: vfydayb3 with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: True
[34m[1mwandb[0m: 	weight_ratio: 0.5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mexatrkx[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


Validation sanity check:  50%|█████     | 1/2 [00:02<00:02,  2.58s/it]



                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:36<00:01,  3.62it/s, loss=0.217, v_num=ayb3]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:45<00:00,  3.51it/s, loss=0.217, v_num=ayb3]
Epoch 0: 100%|█████████▉| 1003/1005 [04:56<00:00,  3.39it/s, loss=0.217, v_num=ayb3]
Epoch 0: 100%|█████████▉| 1004/1005 [05:01<00:00,  3.33it/s, loss=0.217, v_num=ayb3]
Epoch 0: 100%|██████████| 1005/1005 [05:09<00:00,  3.25it/s, loss=0.217, v_num=ayb3]
Epoch 0: 100%|██████████| 1005/1005 [05:19<00:00,  3.15it/s, loss=0.217, v_num=ayb3]
Epoch 1: 100%|█████████▉| 1000/1005 [04:38<00:01,  3.59it/s, loss=0.215, v_num=ayb3]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:48<00:00,  3.47it/s, loss=0.215, v_num=ayb3]
Epoch 1: 100%|█████████▉| 1003/1005 [05:05<00:00,  3.28it/s, loss=0.215, v_num=ayb3]
Epoch 1: 100%|█████████▉| 1004/1005 [05:05<00:00,  3.28it/s, 

0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▄▃▆▁▁▆▇▇██
cut_pur,▁▂▃▃▄▅▆▇▇█
dist@0.98,▁▂▃▅█▇▇█▇█
eff,█▇▆▅▄▃▂▂▂▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▁▂▃▃▄▅▆▇▇█
train_loss,█▆▄▄▄▃▃▃▃▃▃▃▃▂▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▂▂▂▁▂▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97667
cut_pur,0.00019
dist@0.98,0.42055
eff,0.87519
epoch,9.0
pur,0.00109
train_loss,0.21014
trainer/global_step,9999.0


[34m[1mwandb[0m: Agent Starting Run: xe4j70hs with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: True
[34m[1mwandb[0m: 	weight_ratio: 0.75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:36<00:01,  3.61it/s, loss=0.238, v_num=70hs]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:46<00:00,  3.49it/s, loss=0.238, v_num=70hs]
Epoch 0: 100%|█████████▉| 1003/1005 [05:03<00:00,  3.31it/s, loss=0.238, v_num=70hs]
Epoch 0: 100%|█████████▉| 1004/1005 [05:03<00:00,  3.31it/s, loss=0.238, v_num=70hs]
Epoch 0: 100%|██████████| 1005/1005 [05:11<00:00,  3.22it/s, loss=0.238, v_num=70hs]
Epoch 0: 100%|██████████| 1005/1005 [05:21<00:00,  3.13it/s, loss=0.238, v_num=70hs]
Epoch 1: 100%|█████████▉| 1000/1005 [04:37<00:01,  3.61it/s, loss=0.236, v_num=70hs]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:48<00:00,  3.47it/s, loss=0.236, v_num=70hs]
Epoch 1: 100%|█████████▉| 1003/1005 [04:56<00:00,  3.38it/s, loss=0.236, v_num=70hs]
Epoch 1: 100%|█████████▉| 1004/1005 [05:06<00:00,  3.28it/s, 

0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▇▄▂▁▂▅▅▆▆█
cut_pur,▁▂▂▃▃▅▆▇▇█
dist@0.98,▁▄▅█▆▆▆▇▇▇
eff,█▆▆▃▄▃▃▂▁▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▁▂▂▃▃▅▆▇▇█
train_loss,█▇▅▅▄▄▄▃▄▃▃▃▃▃▂▃▃▃▃▃▂▂▂▁▂▂▂▂▂▂▂▂▂▁▂▂▁▂▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97657
cut_pur,0.00019
dist@0.98,0.38552
eff,0.87581
epoch,9.0
pur,0.00108
train_loss,0.22926
trainer/global_step,9999.0


[34m[1mwandb[0m: Agent Starting Run: 5e9czlgy with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: True
[34m[1mwandb[0m: 	weight_ratio: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:37<00:01,  3.61it/s, loss=0.242, v_num=zlgy]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:47<00:00,  3.49it/s, loss=0.242, v_num=zlgy]
Validating:  40%|████      | 2/5 [00:17<00:25,  8.48s/it][A
Epoch 0: 100%|█████████▉| 1004/1005 [05:03<00:00,  3.31it/s, loss=0.242, v_num=zlgy]
Epoch 0: 100%|██████████| 1005/1005 [05:11<00:00,  3.23it/s, loss=0.242, v_num=zlgy]
Epoch 0: 100%|██████████| 1005/1005 [05:20<00:00,  3.14it/s, loss=0.242, v_num=zlgy]
Epoch 1: 100%|█████████▉| 1000/1005 [04:38<00:01,  3.60it/s, loss=0.24, v_num=zlgy] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:48<00:00,  3.47it/s, loss=0.24, v_num=zlgy]
Epoch 1: 100%|█████████▉| 1003/1005 [04:57<00:00,  3.37it/s, loss=0.24, v_num=zlgy]
Epoch 1: 100%|█████████▉| 1004/1005 [05:07<00:00,  3.27it/s, loss=0.24, v_num=zlgy]
Epo

[34m[1mwandb[0m: [32m[41mERROR[0m Error while calling W&B API: context deadline exceeded (<Response [500]>)


Epoch 5: 100%|█████████▉| 1000/1005 [04:39<00:01,  3.58it/s, loss=0.236, v_num=zlgy]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 5: 100%|█████████▉| 1002/1005 [04:48<00:00,  3.47it/s, loss=0.236, v_num=zlgy]
Validating:  40%|████      | 2/5 [00:17<00:25,  8.60s/it][A
Epoch 5: 100%|█████████▉| 1004/1005 [05:05<00:00,  3.28it/s, loss=0.236, v_num=zlgy]
Epoch 5: 100%|██████████| 1005/1005 [05:13<00:00,  3.21it/s, loss=0.236, v_num=zlgy]
Epoch 5: 100%|██████████| 1005/1005 [05:22<00:00,  3.12it/s, loss=0.236, v_num=zlgy]
Epoch 6: 100%|█████████▉| 1000/1005 [04:39<00:01,  3.58it/s, loss=0.236, v_num=zlgy]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 6: 100%|█████████▉| 1002/1005 [04:48<00:00,  3.47it/s, loss=0.236, v_num=zlgy]
Epoch 6: 100%|█████████▉| 1003/1005 [05:04<00:00,  3.29it/s, loss=0.236, v_num=zlgy]
Epoch 6: 100%|█████████▉| 1004/1005 [05:05<00:00,  3.29it/s, loss=0.236, v_num=zlgy]


wandb: Network error (ReadTimeout), entering retry loop.


Epoch 7: 100%|█████████▉| 1000/1005 [04:39<00:01,  3.58it/s, loss=0.235, v_num=zlgy]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 7: 100%|█████████▉| 1002/1005 [04:48<00:00,  3.48it/s, loss=0.235, v_num=zlgy]
Epoch 7: 100%|█████████▉| 1003/1005 [05:02<00:00,  3.31it/s, loss=0.235, v_num=zlgy]
Epoch 7: 100%|█████████▉| 1004/1005 [05:03<00:00,  3.31it/s, loss=0.235, v_num=zlgy]
Epoch 7: 100%|██████████| 1005/1005 [05:10<00:00,  3.23it/s, loss=0.235, v_num=zlgy]
Epoch 7: 100%|██████████| 1005/1005 [05:19<00:00,  3.15it/s, loss=0.235, v_num=zlgy]
Epoch 8: 100%|█████████▉| 1000/1005 [04:40<00:01,  3.57it/s, loss=0.235, v_num=zlgy]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 8: 100%|█████████▉| 1002/1005 [04:47<00:00,  3.48it/s, loss=0.235, v_num=zlgy]
Validating:  40%|████      | 2/5 [00:14<00:21,  7.28s/it][A
Epoch 8: 100%|█████████▉| 1004/1005 [05:02<00:00,  3.32it/s, loss=0.235, v_num=zlgy]


0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▆▂▁▂▃▄▅▇▇█
cut_pur,▁▁▂▃▃▅▆▇▇█
dist@0.98,▁▅▆▆▇▇▇▇██
eff,█▇▆▅▅▃▂▂▁▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▁▁▂▃▃▅▆▇▇█
train_loss,█▆▅▄▄▄▄▃▃▃▃▄▃▃▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▁▂▁▂▂▁▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97652
cut_pur,0.00019
dist@0.98,0.36243
eff,0.87676
epoch,9.0
pur,0.00106
train_loss,0.23369
trainer/global_step,9999.0


[34m[1mwandb[0m: Agent Starting Run: o18o93dm with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: True
[34m[1mwandb[0m: 	weight_ratio: 1.5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:36<00:01,  3.62it/s, loss=0.232, v_num=93dm]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:45<00:00,  3.51it/s, loss=0.232, v_num=93dm]
Epoch 0: 100%|█████████▉| 1003/1005 [04:56<00:00,  3.39it/s, loss=0.232, v_num=93dm]
Epoch 0: 100%|█████████▉| 1004/1005 [05:02<00:00,  3.31it/s, loss=0.232, v_num=93dm]
Epoch 0: 100%|██████████| 1005/1005 [05:11<00:00,  3.23it/s, loss=0.232, v_num=93dm]
Epoch 0: 100%|██████████| 1005/1005 [05:20<00:00,  3.13it/s, loss=0.232, v_num=93dm]
Epoch 1: 100%|█████████▉| 1000/1005 [04:37<00:01,  3.60it/s, loss=0.231, v_num=93dm]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:48<00:00,  3.47it/s, loss=0.231, v_num=93dm]
Epoch 1: 100%|█████████▉| 1003/1005 [04:57<00:00,  3.37it/s, loss=0.231, v_num=93dm]
Epoch 1: 100%|█████████▉| 1004/1005 [05:07<00:00,  3.26it/s, 

0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▆▄▁▄▂▆▆▇██
cut_pur,▁▁▂▃▄▅▆▆██
dist@0.98,▁▄▃▅▅▆▆▇██
eff,█▇▆▅▃▃▃▂▁▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▁▁▂▃▄▅▆▆██
train_loss,█▆▅▄▄▄▄▄▄▃▃▃▃▃▃▂▃▂▃▂▂▂▂▂▂▃▁▁▂▂▂▁▂▂▂▁▁▁▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97642
cut_pur,0.00018
dist@0.98,0.31782
eff,0.87971
epoch,9.0
pur,0.00103
train_loss,0.22485
trainer/global_step,9999.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: m1sl7avx with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: True
[34m[1mwandb[0m: 	weight_ratio: 2
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:38<00:01,  3.59it/s, loss=0.216, v_num=7avx]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:49<00:00,  3.47it/s, loss=0.216, v_num=7avx]
Epoch 0: 100%|█████████▉| 1003/1005 [04:57<00:00,  3.37it/s, loss=0.216, v_num=7avx]
Epoch 0: 100%|█████████▉| 1004/1005 [05:05<00:00,  3.28it/s, loss=0.216, v_num=7avx]
Epoch 0: 100%|██████████| 1005/1005 [05:13<00:00,  3.20it/s, loss=0.216, v_num=7avx]
Epoch 0: 100%|██████████| 1005/1005 [05:22<00:00,  3.11it/s, loss=0.216, v_num=7avx]
Epoch 1: 100%|█████████▉| 1000/1005 [04:38<00:01,  3.59it/s, loss=0.214, v_num=7avx]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:49<00:00,  3.46it/s, loss=0.214, v_num=7avx]
Epoch 1: 100%|█████████▉| 1003/1005 [04:57<00:00,  3.37it/s, loss=0.214, v_num=7avx]
Epoch 1: 100%|█████████▉| 1004/1005 [05:07<00:00,  3.26it/s, 

0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▆▆▁▃▁▅▇▇██
cut_pur,▁▁▂▂▃▅▆▆██
dist@0.98,▁▄▆▅█▇██▇█
eff,▇█▆▅▄▃▃▂▁▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▁▁▂▂▃▅▆▆██
train_loss,█▇▄▅▄▄▃▃▄▃▃▃▃▃▃▃▃▃▃▃▂▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97626
cut_pur,0.00018
dist@0.98,0.27898
eff,0.88107
epoch,9.0
pur,0.00101
train_loss,0.2101
trainer/global_step,9999.0


[34m[1mwandb[0m: Agent Starting Run: lc0p2s9p with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: True
[34m[1mwandb[0m: 	weight_ratio: 3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:37<00:01,  3.60it/s, loss=0.184, v_num=2s9p]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:46<00:00,  3.50it/s, loss=0.184, v_num=2s9p]
Validating:  40%|████      | 2/5 [00:16<00:23,  7.91s/it][A
Epoch 0: 100%|█████████▉| 1004/1005 [05:01<00:00,  3.33it/s, loss=0.184, v_num=2s9p]
Epoch 0: 100%|██████████| 1005/1005 [05:09<00:00,  3.25it/s, loss=0.184, v_num=2s9p]
Epoch 0: 100%|██████████| 1005/1005 [05:17<00:00,  3.16it/s, loss=0.184, v_num=2s9p]
Epoch 1: 100%|█████████▉| 1000/1005 [04:38<00:01,  3.59it/s, loss=0.181, v_num=2s9p]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:48<00:00,  3.47it/s, loss=0.181, v_num=2s9p]
Epoch 1: 100%|█████████▉| 1003/1005 [04:56<00:00,  3.38it/s, loss=0.181, v_num=2s9p]
Epoch 1: 100%|█████████▉| 1004/1005 [05:07<00:00,  3.27it/s, loss=0.181, v_num=2s9p]


0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▃▅▁▃▂▆▆▆▇█
cut_pur,▁▁▂▃▂▅▆▆██
dist@0.98,▁▅▄▆▇▇██▇█
eff,▇█▄▂▅▃▃▂▂▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▁▁▂▂▂▅▆▆██
train_loss,█▆▄▅▄▃▃▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▁▁▂▂▂▂▁▂
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97634
cut_pur,0.00017
dist@0.98,0.22484
eff,0.88324
epoch,9.0
pur,0.00097
train_loss,0.17962
trainer/global_step,9999.0


[34m[1mwandb[0m: Agent Starting Run: pqeldq4t with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: True
[34m[1mwandb[0m: 	weight_ratio: 4
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:38<00:01,  3.59it/s, loss=0.156, v_num=dq4t]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:47<00:00,  3.49it/s, loss=0.156, v_num=dq4t]
Epoch 0: 100%|█████████▉| 1003/1005 [05:02<00:00,  3.32it/s, loss=0.156, v_num=dq4t]
Epoch 0: 100%|█████████▉| 1004/1005 [05:02<00:00,  3.31it/s, loss=0.156, v_num=dq4t]
Epoch 0: 100%|██████████| 1005/1005 [05:09<00:00,  3.24it/s, loss=0.156, v_num=dq4t]
Epoch 0: 100%|██████████| 1005/1005 [05:18<00:00,  3.16it/s, loss=0.156, v_num=dq4t]
Epoch 1: 100%|█████████▉| 1000/1005 [04:38<00:01,  3.58it/s, loss=0.155, v_num=dq4t]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:49<00:00,  3.47it/s, loss=0.155, v_num=dq4t]
Epoch 1: 100%|█████████▉| 1003/1005 [04:57<00:00,  3.37it/s, loss=0.155, v_num=dq4t]
Epoch 1: 100%|█████████▉| 1004/1005 [05:07<00:00,  3.26it/s, 

0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▅▆▃▁▃▆▇▇█▇
cut_pur,▂▁▁▁▃▅▆▆▇█
dist@0.98,▁▄▆█▇▇▇██▇
eff,▆█▅▄▁▄▂▂▁▂
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▂▁▁▁▃▅▆▆▇█
train_loss,█▅▅▄▃▃▃▄▂▃▃▂▃▂▂▃▂▂▃▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▁▂▂▂▂▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97622
cut_pur,0.00016
dist@0.98,0.18876
eff,0.88591
epoch,9.0
pur,0.00094
train_loss,0.15269
trainer/global_step,9999.0


[34m[1mwandb[0m: Agent Starting Run: dpw2rrhi with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: True
[34m[1mwandb[0m: 	weight_ratio: 6
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:38<00:01,  3.60it/s, loss=0.121, v_num=rrhi]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:46<00:00,  3.50it/s, loss=0.121, v_num=rrhi]
Validating:  40%|████      | 2/5 [00:15<00:22,  7.52s/it][A
Epoch 0: 100%|█████████▉| 1004/1005 [05:01<00:00,  3.33it/s, loss=0.121, v_num=rrhi]
Epoch 0: 100%|██████████| 1005/1005 [05:08<00:00,  3.26it/s, loss=0.121, v_num=rrhi]
Epoch 0: 100%|██████████| 1005/1005 [05:17<00:00,  3.17it/s, loss=0.121, v_num=rrhi]
Epoch 1: 100%|█████████▉| 1000/1005 [04:37<00:01,  3.61it/s, loss=0.12, v_num=rrhi] 
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:47<00:00,  3.49it/s, loss=0.12, v_num=rrhi]
Epoch 1: 100%|█████████▉| 1003/1005 [04:56<00:00,  3.39it/s, loss=0.12, v_num=rrhi]
Epoch 1: 100%|█████████▉| 1004/1005 [05:05<00:00,  3.28it/s, loss=0.12, v_num=rrhi]
Epo

0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▆▆▆▁▄▆█▇▇█
cut_pur,▄▁▂▂▃▄▅▇▇█
dist@0.98,▁▄▅▆▇██▇██
eff,▆█▆▅▃▂▃▂▁▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▄▁▂▂▃▄▅▇▇█
train_loss,█▃▃▃▃▂▂▂▂▂▂▂▂▂▁▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97631
cut_pur,0.00016
dist@0.98,0.14289
eff,0.88812
epoch,9.0
pur,0.00091
train_loss,0.11874
trainer/global_step,9999.0


[34m[1mwandb[0m: Agent Starting Run: e8cckta3 with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: True
[34m[1mwandb[0m: 	weight_ratio: 8
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:38<00:01,  3.60it/s, loss=0.0976, v_num=kta3]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:46<00:00,  3.50it/s, loss=0.0976, v_num=kta3]
Validating:  40%|████      | 2/5 [00:15<00:23,  7.82s/it][A
Epoch 0: 100%|█████████▉| 1004/1005 [05:02<00:00,  3.32it/s, loss=0.0976, v_num=kta3]
Epoch 0: 100%|██████████| 1005/1005 [05:09<00:00,  3.25it/s, loss=0.0976, v_num=kta3]
Epoch 0: 100%|██████████| 1005/1005 [05:17<00:00,  3.16it/s, loss=0.0976, v_num=kta3]
Epoch 1: 100%|█████████▉| 1000/1005 [04:39<00:01,  3.58it/s, loss=0.0971, v_num=kta3]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:49<00:00,  3.46it/s, loss=0.0971, v_num=kta3]
Epoch 1: 100%|█████████▉| 1003/1005 [04:57<00:00,  3.37it/s, loss=0.0971, v_num=kta3]
Epoch 1: 100%|█████████▉| 1004/1005 [05:07<00:00,  3.27it/s, loss=0.0971, v_n

0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▁▄▇▅▆▇█▇██
cut_pur,▃▁▄▃▄▅▇▇▇█
dist@0.98,▁▄▄▇▇█▇▇██
eff,█▆▃▂▂▃▂▂▃▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▄▁▄▂▄▅▇▇▇█
train_loss,█▃█▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▁▁▂▁▁▁▂▁▁▁▂▁▁▁▁▁▂▂▁▁▁
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97635
cut_pur,0.00015
dist@0.98,0.11462
eff,0.88869
epoch,9.0
pur,0.00089
train_loss,0.09634
trainer/global_step,9999.0


[34m[1mwandb[0m: Agent Starting Run: sd2wuajj with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: False
[34m[1mwandb[0m: 	weight_ratio: 0.5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:28<00:01,  3.73it/s, loss=0.204, v_num=uajj]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:31<00:00,  3.69it/s, loss=0.204, v_num=uajj]
Validating:  40%|████      | 2/5 [00:05<00:07,  2.53s/it][A
Epoch 0: 100%|█████████▉| 1004/1005 [04:36<00:00,  3.64it/s, loss=0.204, v_num=uajj]
Validating:  80%|████████  | 4/5 [00:09<00:02,  2.27s/it][A
Epoch 0: 100%|██████████| 1005/1005 [04:40<00:00,  3.58it/s, loss=0.204, v_num=uajj]
Epoch 1: 100%|█████████▉| 1000/1005 [04:27<00:01,  3.75it/s, loss=0.198, v_num=uajj]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:30<00:00,  3.71it/s, loss=0.198, v_num=uajj]
Validating:  40%|████      | 2/5 [00:06<00:09,  3.02s/it][A
Epoch 1: 100%|█████████▉| 1004/1005 [04:35<00:00,  3.64it/s, loss=0.198, v_num=uajj]
Validating:  80%|████████  | 4/5 [00:11<00:02,  

0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▆▁▇▇▆▆█▇▇▆
cut_pur,▂▁▂▂▁▄▆▇██
dist@0.98,▃█▁▆▁▆▆▆▆▇
eff,██▇▆▇▄▃▂▁▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▂▁▂▂▁▄▆▇██
train_loss,▇▆▅▆▇▆▅▅▅█▄▅▄▄▄▅▄▅▄▃▃▃▃▂▃▂▂▃▃▁▂▃▃▂▃▂▂▂▃▂
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97764
cut_pur,0.00048
dist@0.98,0.4529
eff,0.86513
epoch,9.0
pur,0.00269
train_loss,0.17601
trainer/global_step,9999.0


[34m[1mwandb[0m: Agent Starting Run: 8il3bnpo with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: False
[34m[1mwandb[0m: 	weight_ratio: 0.75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:29<00:01,  3.71it/s, loss=0.233, v_num=bnpo]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:37<00:00,  3.61it/s, loss=0.233, v_num=bnpo]
Epoch 0: 100%|█████████▉| 1003/1005 [04:50<00:00,  3.45it/s, loss=0.233, v_num=bnpo]
Epoch 0: 100%|█████████▉| 1004/1005 [04:52<00:00,  3.44it/s, loss=0.233, v_num=bnpo]
Epoch 0: 100%|██████████| 1005/1005 [04:58<00:00,  3.37it/s, loss=0.233, v_num=bnpo]
Epoch 0: 100%|██████████| 1005/1005 [05:06<00:00,  3.28it/s, loss=0.233, v_num=bnpo]
Epoch 1: 100%|█████████▉| 1000/1005 [04:28<00:01,  3.73it/s, loss=0.212, v_num=bnpo]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:30<00:00,  3.70it/s, loss=0.212, v_num=bnpo]
Validating:  40%|████      | 2/5 [00:05<00:07,  2.54s/it][A
Epoch 1: 100%|█████████▉| 1004/1005 [04:35<00:00,  3.65it/s, loss=0.212, v_num=bnpo]


0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▁█████████
cut_pur,▁▃▂▃▃▅▆▇▇█
dist@0.98,▁▇▆▆▇▇▇███
eff,█▅▅▄▄▃▂▂▂▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▁▃▂▃▃▅▆▇▇█
train_loss,▇▅▆▆▆▆▄▄▄▅▆▇▄▃█▇▄▃▃▃▃▃▂▂▃▂▁▂▃▁▁▂▂▂▂▂▁▁▂▂
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97775
cut_pur,0.00055
dist@0.98,0.41512
eff,0.86346
epoch,9.0
pur,0.00306
train_loss,0.19862
trainer/global_step,9999.0


[34m[1mwandb[0m: Agent Starting Run: af8y5vvg with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: False
[34m[1mwandb[0m: 	weight_ratio: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:28<00:01,  3.73it/s, loss=0.227, v_num=5vvg]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:31<00:00,  3.69it/s, loss=0.227, v_num=5vvg]
Validating:  40%|████      | 2/5 [00:04<00:07,  2.38s/it][A
Epoch 0: 100%|█████████▉| 1004/1005 [04:35<00:00,  3.64it/s, loss=0.227, v_num=5vvg]
Validating:  80%|████████  | 4/5 [00:09<00:02,  2.12s/it][A
Epoch 0: 100%|██████████| 1005/1005 [04:40<00:00,  3.59it/s, loss=0.227, v_num=5vvg]
Epoch 1: 100%|█████████▉| 1000/1005 [04:27<00:01,  3.74it/s, loss=0.218, v_num=5vvg]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:31<00:00,  3.69it/s, loss=0.218, v_num=5vvg]
Validating:  40%|████      | 2/5 [00:06<00:09,  3.04s/it][A
Epoch 1: 100%|█████████▉| 1004/1005 [04:36<00:00,  3.63it/s, loss=0.218, v_num=5vvg]
Validating:  80%|████████  | 4/5 [00:11<00:02,  

0,1
current_lr,████▃▃▃▃▃▁
cut_eff,▇▁▆▅▂▇█▇██
cut_pur,▂▁▂▂▂▄▆▇██
dist@0.98,▂▁▇▆▄▇███▇
eff,▇█▇▆▅▃▃▂▁▁
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
pur,▂▁▂▂▂▄▆▇██
train_loss,▅▄▄▄▄▃▄▄▅▄▄█▅▃▃▂▃▃▃▃▃▂▂▃▃▃▂▃▂▃▂▂▂▂▂▁▂▂▂▂
trainer/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
current_lr,0.0
cut_eff,0.97767
cut_pur,0.00055
dist@0.98,0.38314
eff,0.8621
epoch,9.0
pur,0.00307
train_loss,0.19628
trainer/global_step,9999.0


[34m[1mwandb[0m: Agent Starting Run: dinqaylg with config:
[34m[1mwandb[0m: 	margin: 0.5
[34m[1mwandb[0m: 	use_bidir_truth: False
[34m[1mwandb[0m: 	weight_ratio: 1.5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: wandb version 0.12.11 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(

  | Name          | Type             | Params
---------------------------------------------------
0 | cos           | CosineSimilarity | 0     
1 | input_layer1  | Linear           | 12.3 K
2 | layers1       | ModuleList       | 5.2 M 
3 | output_layer1 | Linear           | 16.4 K
4 | input_layer2  | Linear           | 12.3 K
5 | layers2       | ModuleList       | 5.2 M 
6 | output_layer2 | Linear           | 16.4 K
7 | act           | GELU             | 0     
---------------------------------------------------
10.6 M    Trainable params
0         Non-trainable params
10.6 M    Total params
42.214    Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  rank_zero_warn(


                                                                      

  rank_zero_warn(


Epoch 0: 100%|█████████▉| 1000/1005 [04:32<00:01,  3.67it/s, loss=0.233, v_num=aylg]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 0: 100%|█████████▉| 1002/1005 [04:39<00:00,  3.59it/s, loss=0.233, v_num=aylg]
Validating:  40%|████      | 2/5 [00:12<00:19,  6.39s/it][A
Epoch 0: 100%|█████████▉| 1004/1005 [04:51<00:00,  3.44it/s, loss=0.233, v_num=aylg]
Epoch 0: 100%|██████████| 1005/1005 [04:57<00:00,  3.38it/s, loss=0.233, v_num=aylg]
Epoch 0: 100%|██████████| 1005/1005 [05:03<00:00,  3.31it/s, loss=0.233, v_num=aylg]
Epoch 1: 100%|█████████▉| 1000/1005 [04:33<00:01,  3.66it/s, loss=0.212, v_num=aylg]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/5 [00:00<?, ?it/s][A
Epoch 1: 100%|█████████▉| 1002/1005 [04:38<00:00,  3.60it/s, loss=0.212, v_num=aylg]
Validating:  40%|████      | 2/5 [00:08<00:12,  4.11s/it][A
Epoch 1: 100%|█████████▉| 1004/1005 [04:45<00:00,  3.51it/s, loss=0.212, v_num=aylg]
Validating:  80%|███████

## Construct PyLightning model

In [None]:
with open("dual_embedding_default.yaml") as f:
    hparams = yaml.load(f, Loader=yaml.FullLoader)

In [None]:
model = VanillaDualEmbedding(hparams)

## Metric Learning

### Train embedding

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint

checkpoint_callback = ModelCheckpoint(
    monitor='pur',
    mode="max",
    save_top_k=2,
    save_last=True)

In [None]:
kaiming_init(model)
logger = WandbLogger(project="ITk_dual_embedding")
trainer = Trainer(gpus=1, max_epochs=hparams["max_epochs"], logger=logger, num_sanity_val_steps=2, callbacks=[checkpoint_callback], log_every_n_steps = 50, default_root_dir="/global/cfs/cdirs/m3443/usr/ryanliu/ITk_embedding/")
# trainer.fit(model)

## Initialize from trained model

In [None]:
import wandb
wandb.finish()

with open("dual_embedding_default.yaml") as f:
    hparams = yaml.load(f, Loader=yaml.FullLoader)
    
hparams["use_dual_encoder"] = True

model = VanillaDualEmbedding(hparams)

checkpoint = torch.load("/global/cfs/cdirs/m3443/usr/ryanliu/ITk_embedding/ITk_dual_embedding/3ijb4qnw/checkpoints/last.ckpt")
state_dict = checkpoint["state_dict"]
names = [i for i in state_dict]
for i in names:
    state = state_dict[i]
    i = i.replace("input_layer1", "input_layer2")
    i = i.replace("layers1", "layers2")
    i = i.replace("output_layer1", "output_layer2")
    state_dict[i] = state

model.load_state_dict(state_dict)
del state_dict

In [None]:
from pytorch_lightning.callbacks import ModelCheckpoint

checkpoint_callback = ModelCheckpoint(
    monitor='pur',
    mode="max",
    save_top_k=2,
    save_last=True)

In [None]:
logger = WandbLogger(project="ITk_dual_embedding")
trainer = Trainer(gpus=1, max_epochs=hparams["max_epochs"], logger=logger, num_sanity_val_steps=2, callbacks=[checkpoint_callback], log_every_n_steps = 50, default_root_dir="/global/cfs/cdirs/m3443/usr/ryanliu/ITk_embedding/")
# trainer.fit(model)