In [1]:
import tqdm
import pytorch_lightning as pl
import numpy as np

from events_data import EventsData
from training_info import TrainingInfo
from dataset import generate_tt_dataset

features = [
    "sym_Jet0_pt",
    "sym_Jet1_pt",
    "sym_Jet2_pt",
    "sym_Jet3_pt",
    "sym_Jet0_eta",
    "sym_Jet1_eta",
    "sym_Jet2_eta",
    "sym_Jet3_eta",
    "sym_Jet0_phi",
    "sym_Jet1_phi",
    "sym_Jet2_phi",
    "sym_Jet3_phi",
    "sym_Jet0_m",
    "sym_Jet1_m",
    "sym_Jet2_m",
    "sym_Jet3_m",
]

seeds = [42, 52, 62, 72, 82, 92]
n_sample_ratios = [0.03125,
0.0625,
0.125,
0.25,
0.5,
1.0,
]
experiment_name = "fvt_training_n_samples_1"

for seed in seeds:
    for n_sample_ratio in tqdm.tqdm(n_sample_ratios):
        hparam_filter = {"experiment_name": experiment_name, 
                         "seed": seed, 
                         "n_sample_ratio": n_sample_ratio}
        hashes = TrainingInfo.find(hparam_filter)
        assert len(hashes) == 1
        print("successfully loaded", hparam_filter)
        tinfo = TrainingInfo.load(hashes[0])
        
        dinfo_train_all, _ = generate_tt_dataset(
                seed,
                tinfo.hparams["n_3b"],
                tinfo.hparams["n_all4b"],
                tinfo.hparams["signal_ratio"],
                tinfo.hparams["test_ratio"],
            )

        pl.seed_everything(seed)
        np.random.seed(seed)

        n_train_val = int(n_sample_ratio * len(dinfo_train_all))
        train_val_idx = np.random.choice(
            len(dinfo_train_all),
            n_train_val,
            replace=False,
        )
        val_ratio = tinfo.hparams["val_ratio"]
        n_val = int(val_ratio * n_train_val)

        dinfo_train = dinfo_train_all[train_val_idx[n_val:]]
        dinfo_val = dinfo_train_all[train_val_idx[:n_val]]

        events_train = EventsData.from_dataframe(
            dinfo_train.fetch_data(),
            features,
            name="fvt_train",
        )
        events_val = EventsData.from_dataframe(
            dinfo_val.fetch_data(),
            features,
            name="fvt_val",
        )

        # reduce number of 4b samples to 1/8
        print(
            "4b ratio: ",
            events_train.total_weight_4b / events_train.total_weight,
        )
        print(
            "Signal ratio: ",
            events_train.total_weight_signal / events_train.total_weight_4b,
        )

        batch_size = tinfo.hparams["batch_size"]
        events_train.fit_batch_size(batch_size)
        events_val.fit_batch_size(batch_size)

        ###########################################################################################
        ###########################################################################################
        tinfo.dinfo_train = dinfo_train
        tinfo.dinfo_val = dinfo_val
        tinfo.save()

  0%|          | 0/6 [00:00<?, ?it/s]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 42, 'n_sample_ratio': 0.03125}


Seed set to 42
 17%|█▋        | 1/6 [00:12<01:00, 12.10s/it]

4b ratio:  0.50367725
Signal ratio:  0.009958669
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 42, 'n_sample_ratio': 0.0625}


Seed set to 42
 33%|███▎      | 2/6 [00:32<01:09, 17.26s/it]

4b ratio:  0.5034877
Signal ratio:  0.009824955
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 42, 'n_sample_ratio': 0.125}


Seed set to 42
 50%|█████     | 3/6 [00:49<00:50, 16.91s/it]

4b ratio:  0.50123215
Signal ratio:  0.009831276
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 42, 'n_sample_ratio': 0.25}


Seed set to 42
 67%|██████▋   | 4/6 [01:03<00:31, 15.72s/it]

4b ratio:  0.5009648
Signal ratio:  0.009960095
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 42, 'n_sample_ratio': 0.5}


Seed set to 42


4b ratio:  0.5016605
Signal ratio:  0.009857018


 83%|████████▎ | 5/6 [01:20<00:16, 16.13s/it]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 42, 'n_sample_ratio': 1.0}


Seed set to 42


4b ratio:  0.5008906
Signal ratio:  0.009983843


100%|██████████| 6/6 [01:34<00:00, 15.70s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 52, 'n_sample_ratio': 0.03125}


Seed set to 52
 17%|█▋        | 1/6 [00:07<00:35,  7.13s/it]

4b ratio:  0.50431406
Signal ratio:  0.009580999
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 52, 'n_sample_ratio': 0.0625}


Seed set to 52
 33%|███▎      | 2/6 [00:13<00:27,  6.84s/it]

4b ratio:  0.5001128
Signal ratio:  0.009562527
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 52, 'n_sample_ratio': 0.125}


Seed set to 52
 50%|█████     | 3/6 [00:20<00:20,  6.79s/it]

4b ratio:  0.49942604
Signal ratio:  0.01008778
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 52, 'n_sample_ratio': 0.25}


Seed set to 52
 67%|██████▋   | 4/6 [00:27<00:13,  6.87s/it]

4b ratio:  0.50200784
Signal ratio:  0.010063941
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 52, 'n_sample_ratio': 0.5}


Seed set to 52


4b ratio:  0.50151694
Signal ratio:  0.009999516


 83%|████████▎ | 5/6 [00:35<00:07,  7.16s/it]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 52, 'n_sample_ratio': 1.0}


Seed set to 52


4b ratio:  0.50094354
Signal ratio:  0.009925559


100%|██████████| 6/6 [00:44<00:00,  7.42s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 62, 'n_sample_ratio': 0.03125}


Seed set to 62
 17%|█▋        | 1/6 [00:06<00:32,  6.43s/it]

4b ratio:  0.50160486
Signal ratio:  0.010287973
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 62, 'n_sample_ratio': 0.0625}


Seed set to 62
 33%|███▎      | 2/6 [00:12<00:25,  6.32s/it]

4b ratio:  0.5019593
Signal ratio:  0.010197287
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 62, 'n_sample_ratio': 0.125}


Seed set to 62
 50%|█████     | 3/6 [00:19<00:19,  6.35s/it]

4b ratio:  0.50116676
Signal ratio:  0.010199089
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 62, 'n_sample_ratio': 0.25}


Seed set to 62
 67%|██████▋   | 4/6 [00:25<00:12,  6.49s/it]

4b ratio:  0.50177145
Signal ratio:  0.009743568
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 62, 'n_sample_ratio': 0.5}


Seed set to 62


4b ratio:  0.50244886
Signal ratio:  0.010075949


 83%|████████▎ | 5/6 [00:33<00:06,  6.82s/it]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 62, 'n_sample_ratio': 1.0}


Seed set to 62


4b ratio:  0.5009891
Signal ratio:  0.00990503


100%|██████████| 6/6 [00:41<00:00,  6.96s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 72, 'n_sample_ratio': 0.03125}


Seed set to 72
 17%|█▋        | 1/6 [00:06<00:32,  6.46s/it]

4b ratio:  0.5041817
Signal ratio:  0.0091002975
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 72, 'n_sample_ratio': 0.0625}


Seed set to 72
 33%|███▎      | 2/6 [00:12<00:25,  6.37s/it]

4b ratio:  0.49983224
Signal ratio:  0.010070477
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 72, 'n_sample_ratio': 0.125}


Seed set to 72
 50%|█████     | 3/6 [00:19<00:19,  6.37s/it]

4b ratio:  0.5006026
Signal ratio:  0.009883689
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 72, 'n_sample_ratio': 0.25}


Seed set to 72
 67%|██████▋   | 4/6 [00:25<00:12,  6.49s/it]

4b ratio:  0.5007137
Signal ratio:  0.009595151
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 72, 'n_sample_ratio': 0.5}


Seed set to 72


4b ratio:  0.5010029
Signal ratio:  0.009946049


 83%|████████▎ | 5/6 [00:33<00:06,  6.82s/it]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 72, 'n_sample_ratio': 1.0}


Seed set to 72


4b ratio:  0.50094503
Signal ratio:  0.010034967


100%|██████████| 6/6 [00:41<00:00,  6.91s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 82, 'n_sample_ratio': 0.03125}


Seed set to 82
 17%|█▋        | 1/6 [00:06<00:30,  6.09s/it]

4b ratio:  0.50341594
Signal ratio:  0.010224051
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 82, 'n_sample_ratio': 0.0625}


Seed set to 82
 33%|███▎      | 2/6 [00:12<00:24,  6.12s/it]

4b ratio:  0.5004241
Signal ratio:  0.009754896
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 82, 'n_sample_ratio': 0.125}


Seed set to 82
 50%|█████     | 3/6 [00:18<00:18,  6.20s/it]

4b ratio:  0.50126415
Signal ratio:  0.010307895
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 82, 'n_sample_ratio': 0.25}


Seed set to 82
 67%|██████▋   | 4/6 [00:25<00:12,  6.35s/it]

4b ratio:  0.50052017
Signal ratio:  0.010292847
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 82, 'n_sample_ratio': 0.5}


Seed set to 82


4b ratio:  0.5006362
Signal ratio:  0.010028345


 83%|████████▎ | 5/6 [00:32<00:06,  6.67s/it]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 82, 'n_sample_ratio': 1.0}


Seed set to 82


4b ratio:  0.50109845
Signal ratio:  0.009912691


100%|██████████| 6/6 [00:40<00:00,  6.76s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 92, 'n_sample_ratio': 0.03125}


Seed set to 92
 17%|█▋        | 1/6 [00:06<00:30,  6.13s/it]

4b ratio:  0.5015729
Signal ratio:  0.009426962
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 92, 'n_sample_ratio': 0.0625}


Seed set to 92
 33%|███▎      | 2/6 [00:11<00:23,  5.95s/it]

4b ratio:  0.5017192
Signal ratio:  0.010063617
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 92, 'n_sample_ratio': 0.125}


Seed set to 92
 50%|█████     | 3/6 [00:17<00:17,  5.96s/it]

4b ratio:  0.50202906
Signal ratio:  0.010101881
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 92, 'n_sample_ratio': 0.25}


Seed set to 92
 67%|██████▋   | 4/6 [00:24<00:12,  6.12s/it]

4b ratio:  0.5011712
Signal ratio:  0.0099167265
successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 92, 'n_sample_ratio': 0.5}


Seed set to 92


4b ratio:  0.50114745
Signal ratio:  0.009974096


 83%|████████▎ | 5/6 [00:31<00:06,  6.42s/it]

successfully loaded {'experiment_name': 'fvt_training_n_samples_1', 'seed': 92, 'n_sample_ratio': 1.0}


Seed set to 92


4b ratio:  0.5010859
Signal ratio:  0.009911856


100%|██████████| 6/6 [00:39<00:00,  6.52s/it]


In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:


import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
dinfo_train, dinfo_test = generate_tt_dataset(
    seed=42, n_3b=200_0000, n_all4b=200_0000, signal_ratio=0.015, test_ratio=0.2
)

dinfo_train_2, dinfo_test_2 = generate_tt_dataset(
    seed=42, n_3b=200_0000, n_all4b=200_0000, signal_ratio=0.015, test_ratio=0.2
)

In [4]:
df_train = dinfo_train.fetch_data()
df_train_2 = dinfo_train_2.fetch_data()
df_test = dinfo_test.fetch_data()
df_test_2 = dinfo_test_2.fetch_data()

In [5]:
assert df_train.equals(df_train_2)
assert df_test.equals(df_test_2)

In [6]:
df_train.head()
df_test.head()

Unnamed: 0,Jet0_pt,Jet1_pt,Jet2_pt,Jet3_pt,Jet0_eta,Jet1_eta,Jet2_eta,Jet3_eta,Jet0_phi,Jet1_phi,...,sym_Jet0_phi,sym_Jet0_pt,sym_Jet0_m,sym_Jet1_pt,sym_Jet1_m,sym_Jet2_pt,sym_Jet2_m,sym_Jet3_pt,sym_Jet3_m,signal
0,102.62719,88.4841,73.482193,64.660904,0.27645,1.218454,0.829643,-1.543073,3.055435,-2.709024,...,0,102.62719,0.0,88.4841,0.0,73.482193,0.0,64.660904,0.0,False
1,107.726097,69.418266,63.368328,51.620911,1.790683,-0.429038,-0.529767,0.779059,2.325519,2.058803,...,0,107.726097,0.0,69.418266,0.0,63.368328,0.0,51.620911,0.0,False
2,181.930542,111.711487,51.089893,43.02187,-1.466572,-1.906868,-0.384857,-2.477571,-1.044509,2.489904,...,0,181.930542,0.0,111.711487,0.0,51.089893,0.0,43.02187,0.0,False
3,74.663414,64.822693,59.59483,54.634785,-0.640479,0.188939,1.059412,2.089017,2.890759,-1.2643,...,0,74.663414,0.0,64.822693,0.0,59.59483,0.0,54.634785,0.0,False
4,70.948875,68.576927,63.459507,45.534973,1.085591,-0.288453,-1.485473,0.631771,0.700424,-2.665915,...,0,70.948875,0.0,68.576927,0.0,63.459507,0.0,45.534973,0.0,False


Seed set to 42


['NbrnTP3fAbnF', 'bmOHnKYaXRvj']
Test Data
