In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
current_pwd = os.getcwd()

possible_paths = [
    '/home/export/soheuny/SRFinder/soheun/notebooks', 
    '/home/soheuny/HH4bsim/soheun/notebooks'
]
    
assert os.getcwd() in possible_paths, f"Did you change the path? It should be one of {possible_paths}"
os.chdir("..")

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import torch

from plots import hist_events_by_labels
from events_data import EventsData
from fvt_classifier import FvTClassifier


features = [
    "sym_Jet0_pt", "sym_Jet1_pt", "sym_Jet2_pt", "sym_Jet3_pt",
    "sym_Jet0_eta", "sym_Jet1_eta", "sym_Jet2_eta", "sym_Jet3_eta",
    "sym_Jet0_phi", "sym_Jet1_phi", "sym_Jet2_phi", "sym_Jet3_phi",  
    "sym_Jet0_m", "sym_Jet1_m", "sym_Jet2_m", "sym_Jet3_m",
]

In [2]:
from events_data import events_from_scdinfo
from tst_info import TSTInfo
from debiasing import get_bias_fn, get_histograms
from plots import hist_events_by_labels
import tqdm


In [3]:
def plot_hist_and_ratio(tstinfo: TSTInfo, 
                    x_values: np.ndarray, 
                    events_tst_clone: EventsData, 
                    in_CR: np.ndarray, 
                    in_SR: np.ndarray, 
                    nbins: int = 10,
                    bins_mode: str = "quantile",
                    ylim: tuple[float, float] = (0.5, 1.5), 
                    xlabel: str = None,
                    yscale: str = "linear"):
    assert len(x_values) == len(events_tst_clone)
    
    if bins_mode == "quantile":
        bins_CR = np.quantile(x_values[in_CR], np.linspace(0, 1, nbins))
        bins_SR = np.quantile(x_values[in_SR], np.linspace(0, 1, nbins))
    else:
        bins_CR = np.linspace(np.min(x_values[in_CR]), np.max(x_values[in_CR]), nbins)
        bins_SR = np.linspace(np.min(x_values[in_SR]), np.max(x_values[in_SR]), nbins)
        

    gs = GridSpec(3, 2)
    fig = plt.figure(figsize=(15, 7))
    fig.suptitle(f"seed={tstinfo.hparams['seed']}, signal_ratio={tstinfo.hparams['signal_ratio']}")
    fig.supxlabel(xlabel)
    
    tst_fvt_scores_CR = x_values[in_CR]
    events_tst_clone_CR = events_tst_clone[in_CR]

    
    hist_4b_CR, _ = np.histogram(tst_fvt_scores_CR[events_tst_clone_CR.is_4b], 
                                    bins=bins_CR, 
                                    weights=events_tst_clone_CR.weights[events_tst_clone_CR.is_4b])
    
    hist_3b_CR, _ = np.histogram(tst_fvt_scores_CR[~events_tst_clone_CR.is_4b], 
                                    bins=bins_CR, 
                                    weights=events_tst_clone_CR.weights[~events_tst_clone_CR.is_4b])
    
    
    ax = fig.add_subplot(gs[:2, 0])
    ax.set_title("Control Region")
    ax.stairs(hist_3b_CR, bins_CR, label="3b")
    ax.stairs(hist_4b_CR, bins_CR, label="4b")
    ax.set_yscale(yscale)
    ax.legend()
    
    ax = fig.add_subplot(gs[2, 0])
    ax.errorbar((bins_CR[1:] + bins_CR[:-1]) / 2, hist_4b_CR / hist_3b_CR, 
                yerr=np.sqrt(hist_4b_CR) / hist_3b_CR, 
                fmt="o", markersize=3, label="4b / 3b", capsize=3)
    ax.set_ylim(ylim)
    ax.set_yticks(np.arange(ylim[0], ylim[1] + 0.1, 0.1))  # Set y ticks from 0.5 to 1.5 with interval 0.1
    ax.set_yticklabels([ylim[0]] + [None] * 4 + [(ylim[0] + ylim[1]) / 2] + [None] * 4 + [ylim[1]])
    
    ax.set_ylabel("True 4b / Modeled 4b")
    ax.hlines(1, bins_CR[0], bins_CR[-1], color="black", linestyle="--")
    ymin, ymax = ax.get_ylim()
    ax.vlines(bins_CR[1:-1], ymin, ymax, color="black", linestyle="--", alpha=0.5)
    
    # get histogram of 4b, 3b in SR
    tst_fvt_scores_SR = x_values[in_SR]
    events_tst_clone_SR = events_tst_clone[in_SR]

    
    hist_4b_SR, _ = np.histogram(tst_fvt_scores_SR[events_tst_clone_SR.is_4b], 
                                    bins=bins_SR, 
                                    weights=events_tst_clone_SR.weights[events_tst_clone_SR.is_4b])
    
    hist_3b_SR, _ = np.histogram(tst_fvt_scores_SR[~events_tst_clone_SR.is_4b], 
                                    bins=bins_SR, 
                                    weights=events_tst_clone_SR.weights[~events_tst_clone_SR.is_4b])
    
    ax = fig.add_subplot(gs[:2, 1])
    ax.set_title("Signal Region")
    ax.stairs(hist_3b_SR, bins_SR, label="3b")
    ax.stairs(hist_4b_SR, bins_SR, label="4b")
    ax.set_yscale(yscale)
    ax.legend()
    
    ax = fig.add_subplot(gs[2, 1])
    ax.errorbar((bins_SR[1:] + bins_SR[:-1]) / 2, hist_4b_SR / hist_3b_SR, 
                yerr=np.sqrt(hist_4b_SR) / hist_3b_SR, 
                fmt="o", markersize=3, label="4b / 3b", capsize=3)
    ax.set_ylim(ylim)
    ax.set_yticks(np.arange(ylim[0], ylim[1] + 0.1, 0.1))  # Set y ticks from 0.5 to 1.5 with interval 0.1
    ax.set_yticklabels([ylim[0]] + [None] * 4 + [(ylim[0] + ylim[1]) / 2] + [None] * 4 + [ylim[1]])
    
    ax.set_ylabel("True 4b / Modeled 4b")
    ax.hlines(1, bins_SR[0], bins_SR[-1], color="black", linestyle="--")
    ymin, ymax = ax.get_ylim()
    ax.vlines(bins_SR[1:-1], ymin, ymax, color="black", linestyle="--", alpha=0.5)
    
    # plt.show()
    # plt.close()

In [4]:
def plot_reweighted_samples(events_reweighted: EventsData, 
                            events_not_reweighted: EventsData,
                            hist_values: np.ndarray, 
                            fig: plt.Figure):
    gs = GridSpec(2, 2, figure=fig)
    for ax_cnt, quantile in enumerate([True, False]):
        if quantile:
            q = np.linspace(0, 1, 10)
            bins = np.quantile(hist_values, q)
        else:
            bins = np.linspace(np.min(hist_values), np.max(hist_values), 10)

        ax = fig.add_subplot(gs[ax_cnt, 0])
        hist_events_by_labels(events_reweighted, hist_values, bins=bins, ax=ax)
        ax.set_title("Reweighted")
        ax.legend()
        ax = fig.add_subplot(gs[ax_cnt, 1])
        hist_events_by_labels(events_not_reweighted, hist_values, bins=bins, ax=ax)
        ax.set_title("NOT Reweighted")
        ax.legend()

In [5]:
from typing import Callable
import numpy as np

def auc_score(
    clf_scores: np.ndarray, 
    is_4b: np.ndarray,
    weights: np.ndarray = None):
    
    if weights is None:
        weights = np.ones_like(clf_scores)
    
    assert len(clf_scores) == len(is_4b) == len(weights)
    
    clf_scores_3b = clf_scores[~is_4b].reshape(-1, 1)
    clf_scores_4b = clf_scores[is_4b].reshape(1, -1)
    weights_3b = weights[~is_4b].reshape(-1, 1)
    weights_4b = weights[is_4b].reshape(1, -1)
    
    score_diff = clf_scores_4b - clf_scores_3b
    weights = weights_3b * weights_4b
    
    return np.sum(weights * (score_diff > 0)) / np.sum(weights)

def mce_score_fn(pi: float = 0.5):
    
    def mce_score_inner(
        clf_scores: np.ndarray, 
        is_4b: np.ndarray,
        weights: np.ndarray = None):
    
        if weights is None:
            weights = np.ones_like(clf_scores)
        
        assert len(clf_scores) == len(is_4b) == len(weights)
        
        clf_scores_3b = clf_scores[~is_4b]
        clf_scores_4b = clf_scores[is_4b]
        weights_3b = weights[~is_4b]
        weights_4b = weights[is_4b]
        
        return 0.5 * (np.sum(weights_3b * (clf_scores_3b > pi)) / np.sum(weights_3b) + 
                    np.sum(weights_4b * (clf_scores_4b < pi)) / np.sum(weights_4b))
        
    return mce_score_inner
    
    
def test_via_classifier(clf_scores: np.ndarray, is_4b: np.ndarray, weights: np.ndarray, 
                score_func: Callable[[np.ndarray, np.ndarray, np.ndarray], float],
                bootstrap: bool = True,
                n_samples: int = 1000, 
                p_value_type: str = "greater", 
                do_tqdm: bool = True):

    
    assert len(clf_scores) == len(is_4b) == len(weights)
    assert p_value_type in ["greater", "less", "two-sided"], f"p_value_type {p_value_type} is not supported"
    
    score_0 = score_func(clf_scores, is_4b, weights)
    
    null_scores = []
    for _ in tqdm.tqdm(range(n_samples), disable=not do_tqdm):
        if bootstrap:
            indices = np.random.choice(len(clf_scores), len(clf_scores), replace=True)
        else:
            indices = np.arange(len(clf_scores))
            
        clf_scores_rnd = clf_scores[indices]
        weights_rnd = weights[indices]
        is_4b_rnd = np.random.choice(len(clf_scores), np.sum(is_4b), replace=False)
        is_4b_rnd = np.isin(np.arange(len(clf_scores)), is_4b_rnd)
            
        null_scores.append(score_func(clf_scores_rnd, is_4b_rnd, weights_rnd))

    if p_value_type == "greater":
        p_value = np.mean(null_scores > score_0)
    elif p_value_type == "less":
        p_value = np.mean(null_scores < score_0)
    elif p_value_type == "two-sided":
        p_value = np.mean(np.abs(null_scores - score_0) > np.abs(score_0))
    
    return score_0, null_scores, p_value


In [23]:
# np.unique([h["experiment_name"] for h in TSTInfo.get_existing_hparams()])

hashes = TSTInfo.find(hparam_filter={"experiment_name": "counting_test_high_4b_in_CR", "n_3b": 1_400_000}, 
                      sort_by=["seed"])

In [26]:
from training_info import TrainingInfoV2
from plots import calibration_plot
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
from ancillary_features import get_m4j
from sklearn.metrics import roc_auc_score
import pandas as pd

n_3b = 140_0000
device = torch.device("cuda")
experiment_name = "counting_test_high_4b_in_CR"
signal_filename = "HH4b_picoAOD.h5"
ratio_4b = 0.5
batch_size = 1024
hparam_filter = {
    "experiment_name": lambda x: x in [experiment_name],
    "n_3b": n_3b,
    "seed": lambda x: x < 10,
}
do_tqdm = True

hashes = TSTInfo.find(hparam_filter, sort_by=["signal_ratio", "seed"])

df_name = f"data/tsv/tst_results_summary_{experiment_name}_n_3b={n_3b}_mi_test.tsv"
if os.path.exists(df_name):
    df = pd.read_csv(df_name, sep="\t")
else:
    df = pd.DataFrame(columns=["tstinfo_hash", "seed", "signal_ratio", 
                               "auc_score_0", "p_value_auc_bootstrap", "p_value_auc_permutation", 
                               "mce_score_0", "p_value_mce_bootstrap", "p_value_mce_permutation", 
                               ])
    

for tstinfo_hash in hashes:
    tstinfo = TSTInfo.load(tstinfo_hash)
    print(f"n_3b={tstinfo.hparams['n_3b']}, signal_ratio={tstinfo.hparams['signal_ratio']}, seed={tstinfo.hparams['seed']}")
    
    if tstinfo_hash in df["tstinfo_hash"].values:
        continue
    
    CR_fvt_tinfo_hash = tstinfo.CR_fvt_tinfo_hash
    CR_fvt_tinfo = TrainingInfoV2.load(CR_fvt_tinfo_hash)
    CR_model = FvTClassifier.load_from_checkpoint(f"data/checkpoints/{CR_fvt_tinfo.hash}_best.ckpt")
    CR_model.to(device)
    CR_model.eval()
    
    train_scdinfo, val_scdinfo = CR_fvt_tinfo.fetch_train_val_scdinfo()
    events_train = events_from_scdinfo(train_scdinfo, features, signal_filename)
    events_val = events_from_scdinfo(val_scdinfo, features, signal_filename)
    events_tst = events_from_scdinfo(tstinfo.scdinfo_tst, features, signal_filename)
    
    tst_fvt_scores = CR_model.predict(events_tst.X_torch).detach().cpu().numpy()[:, 1]
    SR_stat = tstinfo.SR_stats
    reweights = tst_fvt_scores / (1 - tst_fvt_scores) * (ratio_4b / (1 - ratio_4b))
    SR_cut = tstinfo.SR_cut
    CR_cut = tstinfo.CR_cut
    
    in_SR = SR_stat > SR_cut
    
    events_tst_clone = events_tst.clone()
    events_tst_clone.reweight(np.where(events_tst_clone.is_4b, 
                                        events_tst_clone.weights, 
                                        events_tst_clone.weights * reweights))
    
    in_CR = (SR_stat >= CR_cut) & (SR_stat < SR_cut)
    in_SR = SR_stat >= SR_cut
    events_tst_clone_SR = events_tst_clone[in_SR]
    events_tst_clone_CR = events_tst_clone[in_CR]
    
    SR_classifier = FvTClassifier(
        num_classes=2,
        dim_input_jet_features=4,
        dim_dijet_features=6,
        dim_quadjet_features=6,
        run_name="",
        device=device,
        lr=0.001
    )
    
    events_tst_SR_train, events_tst_SR_test = events_tst_clone_SR.split(0.9)
    events_tst_SR_train, events_tst_SR_val = events_tst_SR_train.split(2/3)
    events_tst_SR_train.fit_batch_size(batch_size=batch_size)
    events_tst_SR_val.fit_batch_size(batch_size=batch_size)
    
    SR_classifier.fit(
        events_tst_SR_train.to_tensor_dataset(),
        events_tst_SR_val.to_tensor_dataset(),
        max_epochs=10,
        save_checkpoint=False,
        callbacks=[], 
        batch_size=batch_size
    )
    
    SR_classifier.eval()
    SR_classifier.to(device)

    SR_classifier_scores = SR_classifier.predict(events_tst_SR_test.X_torch).detach().cpu().numpy()[:, 1]
    
    pi = events_tst_SR_train.total_weight_4b / events_tst_SR_train.total_weight
    mce_score = mce_score_fn(pi=pi)
    
    n_samples = 1000
    
    auc_score_0, null_auc_scores_bootstrap, p_value_auc_bootstrap = test_via_classifier(SR_classifier_scores, 
                                                                                      events_tst_SR_test.is_4b, 
                                                                                      events_tst_SR_test.weights, 
                                                                                      auc_score, 
                                                                                      bootstrap=True, 
                                                                                      n_samples=n_samples, 
                                                                                      p_value_type="greater", 
                                                                                      do_tqdm=do_tqdm)
    auc_score_0, null_auc_scores_permutation, p_value_auc_permutation = test_via_classifier(SR_classifier_scores, 
                                                                                      events_tst_SR_test.is_4b, 
                                                                                      events_tst_SR_test.weights, 
                                                                                      auc_score, 
                                                                                      bootstrap=False, 
                                                                                      n_samples=n_samples, 
                                                                                      p_value_type="greater", 
                                                                                      do_tqdm=do_tqdm)
    mce_score_0, null_mce_scores_bootstrap, p_value_mce_bootstrap = test_via_classifier(SR_classifier_scores, 
                                                                                      events_tst_SR_test.is_4b, 
                                                                                      events_tst_SR_test.weights, 
                                                                                      mce_score, 
                                                                                      bootstrap=True, 
                                                                                      n_samples=n_samples, 
                                                                                      p_value_type="less", 
                                                                                      do_tqdm=do_tqdm)
    mce_score_0, null_mce_scores_permutation, p_value_mce_permutation = test_via_classifier(SR_classifier_scores, 
                                                                                      events_tst_SR_test.is_4b, 
                                                                                      events_tst_SR_test.weights, 
                                                                                      mce_score, 
                                                                                      bootstrap=False, 
                                                                                      n_samples=n_samples, 
                                                                                      p_value_type="less", 
                                                                                      do_tqdm=do_tqdm)
    
        
    
    results = {
        "tstinfo_hash": tstinfo_hash,
        "auc_score_0": auc_score_0,
        "p_value_auc_bootstrap": p_value_auc_bootstrap,
        "p_value_auc_permutation": p_value_auc_permutation,
        "mce_score_0": mce_score_0,
        "p_value_mce_bootstrap": p_value_mce_bootstrap,
        "p_value_mce_permutation": p_value_mce_permutation,
        "seed": tstinfo.hparams["seed"],
        "signal_ratio": tstinfo.hparams["signal_ratio"]
    }
    
    df = pd.concat([df, pd.DataFrame([results])], ignore_index=True)
    df.to_csv(df_name, sep="\t", index=False)


n_3b=1400000, signal_ratio=0.0, seed=0


/home/export/soheuny/.conda/envs/coffea_torch/lib/python3.11/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/export/soheuny/.conda/envs/coffea_torch/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [5]

  | Name     | Type       | Params
----------------------------------------
0 | encoder  | FvTEncoder | 920   
1 | select_q | conv1d     | 8     
2 | out      | conv1d     | 16    
----------------------------------------
895       Trainable params
49        Non-trainable params
944       Total params
0.004     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 85/85 [00:05<00:00, 16.18it/s, v_num=800, val_loss=0.694, train_loss=0.693]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 85/85 [00:05<00:00, 16.07it/s, v_num=800, val_loss=0.694, train_loss=0.693]


100%|██████████| 1000/1000 [01:46<00:00,  9.41it/s]
100%|██████████| 1000/1000 [01:47<00:00,  9.34it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1335.42it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1534.73it/s]
  df = pd.concat([df, pd.DataFrame([results])], ignore_index=True)


n_3b=1400000, signal_ratio=0.01, seed=0


/home/export/soheuny/.conda/envs/coffea_torch/lib/python3.11/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/export/soheuny/.conda/envs/coffea_torch/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [5]

  | Name     | Type       | Params
----------------------------------------
0 | encoder  | FvTEncoder | 920   
1 | select_q | conv1d     | 8     
2 | out      | conv1d     | 16    
----------------------------------------
895       Trainable params
49        Non-trainable params
944       Total params
0.004     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 80/80 [00:05<00:00, 15.51it/s, v_num=801, val_loss=0.693, train_loss=0.693]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 80/80 [00:05<00:00, 15.40it/s, v_num=801, val_loss=0.693, train_loss=0.693]


100%|██████████| 1000/1000 [01:53<00:00,  8.78it/s]
100%|██████████| 1000/1000 [01:53<00:00,  8.83it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1437.10it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1639.93it/s]


n_3b=1400000, signal_ratio=0.02, seed=0


/home/export/soheuny/.conda/envs/coffea_torch/lib/python3.11/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/export/soheuny/.conda/envs/coffea_torch/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [5]

  | Name     | Type       | Params
----------------------------------------
0 | encoder  | FvTEncoder | 920   
1 | select_q | conv1d     | 8     
2 | out      | conv1d     | 16    
----------------------------------------
895       Trainable params
49        Non-trainable params
944       Total params
0.004     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 76/76 [00:04<00:00, 16.27it/s, v_num=802, val_loss=0.686, train_loss=0.686]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 76/76 [00:04<00:00, 16.15it/s, v_num=802, val_loss=0.686, train_loss=0.686]


100%|██████████| 1000/1000 [01:39<00:00, 10.00it/s]
100%|██████████| 1000/1000 [01:40<00:00,  9.95it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1467.28it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1707.21it/s]


n_3b=1400000, signal_ratio=0.0, seed=1


/home/export/soheuny/.conda/envs/coffea_torch/lib/python3.11/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/export/soheuny/.conda/envs/coffea_torch/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [5]

  | Name     | Type       | Params
----------------------------------------
0 | encoder  | FvTEncoder | 920   
1 | select_q | conv1d     | 8     
2 | out      | conv1d     | 16    
----------------------------------------
895       Trainable params
49        Non-trainable params
944       Total params
0.004     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 78/78 [00:04<00:00, 17.56it/s, v_num=803, val_loss=0.693, train_loss=0.693]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 78/78 [00:04<00:00, 17.43it/s, v_num=803, val_loss=0.693, train_loss=0.693]


100%|██████████| 1000/1000 [01:45<00:00,  9.47it/s]
100%|██████████| 1000/1000 [01:45<00:00,  9.46it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1409.71it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1632.61it/s]


n_3b=1400000, signal_ratio=0.01, seed=1


/home/export/soheuny/.conda/envs/coffea_torch/lib/python3.11/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/export/soheuny/.conda/envs/coffea_torch/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [5]

  | Name     | Type       | Params
----------------------------------------
0 | encoder  | FvTEncoder | 920   
1 | select_q | conv1d     | 8     
2 | out      | conv1d     | 16    
----------------------------------------
895       Trainable params
49        Non-trainable params
944       Total params
0.004     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 75/75 [00:04<00:00, 16.02it/s, v_num=804, val_loss=0.693, train_loss=0.693]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 75/75 [00:04<00:00, 15.90it/s, v_num=804, val_loss=0.693, train_loss=0.693]


100%|██████████| 1000/1000 [01:19<00:00, 12.65it/s]
100%|██████████| 1000/1000 [01:18<00:00, 12.70it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1497.70it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1770.03it/s]


n_3b=1400000, signal_ratio=0.02, seed=1


/home/export/soheuny/.conda/envs/coffea_torch/lib/python3.11/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/export/soheuny/.conda/envs/coffea_torch/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [5]

  | Name     | Type       | Params
----------------------------------------
0 | encoder  | FvTEncoder | 920   
1 | select_q | conv1d     | 8     
2 | out      | conv1d     | 16    
----------------------------------------
895       Trainable params
49        Non-trainable params
944       Total params
0.004     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 73/73 [00:04<00:00, 16.25it/s, v_num=805, val_loss=0.691, train_loss=0.692]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 73/73 [00:04<00:00, 16.13it/s, v_num=805, val_loss=0.691, train_loss=0.692]


100%|██████████| 1000/1000 [01:35<00:00, 10.48it/s]
100%|██████████| 1000/1000 [01:34<00:00, 10.59it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1521.60it/s]
100%|██████████| 1000/1000 [00:00<00:00, 1814.19it/s]


n_3b=1400000, signal_ratio=0.0, seed=2


/home/export/soheuny/.conda/envs/coffea_torch/lib/python3.11/site-packages/lightning_fabric/plugins/environments/slurm.py:204: The `srun` command is available on your system but is not used. HINT: If your intention is to run Lightning on SLURM, prepend your python command with `srun` like so: srun python /home/export/soheuny/.conda/envs/coffea_torch/lib/py ...
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [5]

  | Name     | Type       | Params
----------------------------------------
0 | encoder  | FvTEncoder | 920   
1 | select_q | conv1d     | 8     
2 | out      | conv1d     | 16    
----------------------------------------
895       Trainable params
49        Non-trainable params
944       Total params
0.004     Total estimated model params size (MB)


Epoch 9: 100%|██████████| 81/81 [00:04<00:00, 16.82it/s, v_num=806, val_loss=0.693, train_loss=0.693]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 81/81 [00:04<00:00, 16.71it/s, v_num=806, val_loss=0.693, train_loss=0.693]


 97%|█████████▋| 973/1000 [01:30<00:02, 10.74it/s]


KeyboardInterrupt: 

In [None]:
import pandas as pd
import scipy.stats as stats

experiment_name = "counting_test_v2"
n_3b = 100_0000
df_debiased_binwise = pd.read_csv(f"data/tsv/tst_results_summary_{experiment_name}_n_3b={n_3b}_debiased_binwise=True.tsv", 
                                  sep="\t")
df_debiased = pd.read_csv(f"data/tsv/tst_results_summary_{experiment_name}_n_3b={n_3b}_debiased.tsv", 
                          sep="\t")
df_biased = pd.read_csv(f"data/tsv/tst_results_summary_{experiment_name}_n_3b={n_3b}.tsv", 
                              sep="\t")
df_debiased_cheating = pd.read_csv(f"data/tsv/tst_results_summary_{experiment_name}_n_3b={n_3b}_debiased_binwise=True_cheating=True.tsv", 
                                  sep="\t")

df_debiased["p_SR"] = stats.chi2.sf(df_debiased["df_SR"] * df_debiased["sigma_avg_SR"]**2, df=df_debiased["df_SR"])
df_debiased["p_CR"] = stats.chi2.sf(df_debiased["df_CR"] * df_debiased["sigma_avg_CR"]**2, df=df_debiased["df_CR"])
df_biased["p_SR"] = stats.chi2.sf(df_biased["df_SR"] * df_biased["sigma_avg_SR"]**2, df=df_biased["df_SR"])
df_biased["p_CR"] = stats.chi2.sf(df_biased["df_CR"] * df_biased["sigma_avg_CR"]**2, df=df_biased["df_CR"])
df_debiased_binwise["p_SR"] = stats.chi2.sf(df_debiased_binwise["df_SR"] * df_debiased_binwise["sigma_avg_SR"]**2, df=df_debiased_binwise["df_SR"])
df_debiased_binwise["p_CR"] = stats.chi2.sf(df_debiased_binwise["df_CR"] * df_debiased_binwise["sigma_avg_CR"]**2, df=df_debiased_binwise["df_CR"])
df_debiased_cheating["p_SR"] = stats.chi2.sf(df_debiased_cheating["df_SR"] * df_debiased_cheating["sigma_avg_SR"]**2, df=df_debiased_cheating["df_SR"])
df_debiased_cheating["p_CR"] = stats.chi2.sf(df_debiased_cheating["df_CR"] * df_debiased_cheating["sigma_avg_CR"]**2, df=df_debiased_cheating["df_CR"])

sig_level = 0.05

df_debiased["reject_SR"] = df_debiased["p_SR"] < sig_level
df_debiased["reject_CR"] = df_debiased["p_CR"] < sig_level
df_biased["reject_SR"] = df_biased["p_SR"] < sig_level
df_biased["reject_CR"] = df_biased["p_CR"] < sig_level
df_debiased_binwise["reject_SR"] = df_debiased_binwise["p_SR"] < sig_level
df_debiased_binwise["reject_CR"] = df_debiased_binwise["p_CR"] < sig_level
df_debiased_cheating["reject_SR"] = df_debiased_cheating["p_SR"] < sig_level
df_debiased_cheating["reject_CR"] = df_debiased_cheating["p_CR"] < sig_level

print("biased")
display(df_biased[df_biased["nbins"].isin([1, 2, 3, 4, 5])].groupby(["signal_ratio", "reweight", "nbins"]).mean()[["reject_SR", "reject_CR"]])
print("debiased")
display(df_debiased[df_debiased["nbins"].isin([1, 2, 3, 4, 5])].groupby(["signal_ratio", "reweight", "nbins"]).mean()[["reject_SR", "reject_CR"]])
print("debiased_binwise")
display(df_debiased_binwise[df_debiased_binwise["nbins"].isin([1, 2, 3, 4, 5])].groupby(["signal_ratio", "reweight", "nbins"]).mean()[["reject_SR", "reject_CR"]])
print("debiased_cheating")
display(df_debiased_cheating[df_debiased_cheating["nbins"].isin([1, 2, 3, 4, 5])].groupby(["signal_ratio", "reweight", "nbins"]).mean()[["reject_SR", "reject_CR"]])


In [None]:
plt.hist(df_debiased_cheating.loc[(df_debiased_cheating["signal_ratio"] == 0.01) & 
                                  (df_debiased_cheating["nbins"] == 5) & 
                                  (df_debiased_cheating["reweight"] == "CR"), "p_SR"])
plt.show()
plt.close()