In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
current_pwd = os.getcwd()

possible_paths = [
    '/home/export/soheuny/SRFinder/soheun/notebooks', 
    '/home/soheuny/HH4bsim/soheun/notebooks'
]
    
assert os.getcwd() in possible_paths, f"Did you change the path? It should be one of {possible_paths}"
os.chdir("..")

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import torch

from plots import hist_events_by_labels
from events_data import EventsData
from fvt_classifier import FvTClassifier
from tst_info import TSTInfo
# import LogNorm
from matplotlib.colors import LogNorm


features = [
    "sym_Jet0_pt", "sym_Jet1_pt", "sym_Jet2_pt", "sym_Jet3_pt",
    "sym_Jet0_eta", "sym_Jet1_eta", "sym_Jet2_eta", "sym_Jet3_eta",
    "sym_Jet0_phi", "sym_Jet1_phi", "sym_Jet2_phi", "sym_Jet3_phi",  
    "sym_Jet0_m", "sym_Jet1_m", "sym_Jet2_m", "sym_Jet3_m",
]

In [2]:
from events_data import events_from_scdinfo
from tst_info import TSTInfo
from debiasing import get_bias_fn, get_histograms
from plots import hist_events_by_labels
import tqdm
from matplotlib.colors import LogNorm
from dataset import generate_mother_dataset, split_scdinfo
import pytorch_lightning as pl
from signal_region import get_SR_stats
import itertools


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from itertools import product
from training_info import TrainingInfoV2
from plots import calibration_plot, plot_rewighted_samples_by_model
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from ancillary_features import get_m4j
from pl_callbacks import CalibrationPlotCallback, ReweightedPlotCallback

# use tex
plt.rcParams["text.usetex"] = True
plt.rcParams["font.family"] = "serif"
plt.rcParams["font.serif"] = "Times New Roman"

plt.rcParams["figure.dpi"] = 100
plt.rcParams["figure.titlesize"] = 20
plt.rcParams["axes.titlesize"] = 20
plt.rcParams["axes.labelsize"] = 15
plt.rcParams["figure.labelsize"] = 20
plt.rcParams["lines.markersize"] = 3

In [13]:
import time


n_3b = 100_0000
device = torch.device("cuda")
experiment_name = "counting_test_v2"
signal_filename = "HH4b_picoAOD.h5"
ratio_4b = 0.5

seeds = [0]
hparam_filter = {
    "experiment_name": lambda x: x in [experiment_name],
    "n_3b": n_3b,
    "seed": lambda x: x in seeds,
    "signal_ratio": 0.0,
}
config = {
            "batch_schedule": True,
            "batch_milestones": (1, 3, 6, 10, 15),
            "init_lr": 1e-2,
            "lr_schedule": True,
            "min_lr": 1e-3,
            "lr_factor": 0.5,
            "lr_patience": 15,
            "depth": {"encoder": 4, "decoder": 1},
        }
hashes = TSTInfo.find(hparam_filter, sort_by=["seed", "signal_ratio"])

for tstinfo_hash in hashes:
    tstinfo = TSTInfo.load(tstinfo_hash)
    seed = tstinfo.hparams["seed"]
    print(
        f"n_3b={tstinfo.hparams['n_3b']}, signal_ratio={tstinfo.hparams['signal_ratio']}, seed={tstinfo.hparams['seed']}"
    )
    base_fvt_tinfo_hash = tstinfo.base_fvt_tinfo_hash
    base_fvt_tinfo = TrainingInfoV2.load(base_fvt_tinfo_hash)

    train_scdinfo, val_scdinfo = base_fvt_tinfo.fetch_train_val_scdinfo()
    events_train = events_from_scdinfo(train_scdinfo, features, signal_filename)
    events_val = events_from_scdinfo(val_scdinfo, features, signal_filename)
    events_tst = events_from_scdinfo(tstinfo.scdinfo_tst, features, signal_filename)
    events_train.shuffle(seed=seed)
    events_val.shuffle(seed=seed)
    events_tst.shuffle(seed=seed)

    # batch_size = base_fvt_tinfo.hparams["batch_size"] # double the batch size to fit the kernel matrix
    batch_size = 2**10

    events_train.fit_batch_size(batch_size)
    events_val.fit_batch_size(batch_size)

    timestamp = int(time.time())
    
    batch_schedule = config["batch_schedule"]
    batch_milestones = config["batch_milestones"]
    init_lr = config["init_lr"]
    lr_schedule = config["lr_schedule"]
    min_lr = config["min_lr"]
    lr_factor = config["lr_factor"]
    lr_patience = config["lr_patience"]
    run_name = f"bs={batch_schedule}_bs_milestones={batch_milestones}_init_lr={init_lr}_lrs={lr_schedule}_min_lr={min_lr}_lr_factor={lr_factor}_lr_patience={lr_patience}"

    # IMPORTANT: For reproducibility, weight initialization is fixed
    pl.seed_everything(seed)
    base_model_new = FvTClassifier(
        num_classes=2,
        dim_input_jet_features=4,
        dim_dijet_features=base_fvt_tinfo.hparams["dim_dijet_features"],
        dim_quadjet_features=base_fvt_tinfo.hparams["dim_quadjet_features"],
        run_name=f"{run_name}_seed={seed}_timestamp={timestamp}",
        device=device,
        depth=config["depth"],
    )

    if lr_schedule:
        lr_scheduler_config = {
            "type": "ReduceLROnPlateau",
            "factor": lr_factor,
            "threshold": 0.0001,
            "patience": lr_patience,
            "cooldown": 1,
            "min_lr": min_lr,
        }
    else:
        lr_scheduler_config = {"type": "none"}

    if batch_schedule:
        dataloader_config = {
            "batch_size": batch_size,
            "batch_size_milestones": batch_milestones,
            "batch_size_multiplier": 2,
        }
    else:
        dataloader_config = {"batch_size": batch_size}

    base_model_new.fit(
        events_train.to_tensor_dataset(),
        events_val.to_tensor_dataset(),
        max_epochs=200,
        train_seed=seed,
        save_checkpoint=False,
        optimizer_config={"type": "Adam", "lr": init_lr},
        lr_scheduler_config=lr_scheduler_config,
        dataloader_config=dataloader_config,
        tb_log_dir="training_ablation_2",
    )

n_3b=1000000, signal_ratio=0.0, seed=0


[rank: 0] Seed set to 0
[rank: 0] Seed set to 0
/home/export/soheuny/.conda/envs/coffea_torch/lib/python3.11/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/export/soheuny/.conda/envs/coffea_torch/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: tb_logs/training_ablation_2/bs=True_bs_milestones=(1, 3, 6, 10, 15)_init_lr=0.01_lrs=True_min_lr=0.001_lr_factor=0.5_lr_patience=15_seed=0_timestamp=1728770507
/home/export/soheuny/.conda/envs/coffea_torch/lib/python3.11/site-packages/pytorch_lightning/callbacks/model_checkpoint.py:653: Checkpoint directory /home/export/soheuny/SRFinder/soheun/data/tmp/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_V

Temporary checkpoint callback
Epoch 0: 100%|██████████| 1028/1028 [00:48<00:00, 21.38it/s, v_num=0, 1000x_val_loss_first_digits=664.0, 1000x_val_loss_second_digits=0.0832, val_sigma_sq=284.0, lr=0.010]Batch size updated to: 2048
Epoch 2: 100%|██████████| 514/514 [00:30<00:00, 16.66it/s, v_num=0, 1000x_val_loss_first_digits=660.0, 1000x_val_loss_second_digits=0.995, val_sigma_sq=89.60, lr=0.010, train_loss_lower_digits=661.0, train_loss_second_digits=0.512] Batch size updated to: 4096
Epoch 5: 100%|██████████| 257/257 [00:21<00:00, 11.99it/s, v_num=0, 1000x_val_loss_first_digits=661.0, 1000x_val_loss_second_digits=0.475, val_sigma_sq=177.0, lr=0.010, train_loss_lower_digits=660.0, train_loss_second_digits=0.501]Batch size updated to: 8192
Epoch 9: 100%|██████████| 129/129 [00:18<00:00,  6.86it/s, v_num=0, 1000x_val_loss_first_digits=660.0, 1000x_val_loss_second_digits=0.0646, val_sigma_sq=45.00, lr=0.010, train_loss_lower_digits=659.0, train_loss_second_digits=0.873]Batch size updated t

/home/export/soheuny/.conda/envs/coffea_torch/lib/python3.11/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (33) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 199: 100%|██████████| 33/33 [00:20<00:00,  1.64it/s, v_num=0, 1000x_val_loss_first_digits=659.0, 1000x_val_loss_second_digits=0.507, val_sigma_sq=4.040, lr=0.001, train_loss_lower_digits=658.0, train_loss_second_digits=0.571] 

`Trainer.fit` stopped: `max_epochs=200` reached.


Epoch 199: 100%|██████████| 33/33 [00:20<00:00,  1.64it/s, v_num=0, 1000x_val_loss_first_digits=659.0, 1000x_val_loss_second_digits=0.507, val_sigma_sq=4.040, lr=0.001, train_loss_lower_digits=658.0, train_loss_second_digits=0.571]
