In [1]:
import os
import h5py
import itertools

import numpy as np
import pandas as pd

from tqdm import tqdm

In [2]:
def get_Higgs_result(total_event, total_Higgs, correct_event, correct_Higgs, nh, nj):

    start_nj, end_nj = nj

    label = ['all' if nh == 'all' else f'{nh}h' for nj in range(start_nj, end_nj+2)]

    if nh == 'all':
        nh = slice(1, None)
        
    event_type = [f'Nj={nj}' for nj in range(start_nj, end_nj)]
    event_type.append(f'Nj>={end_nj}')
    event_type.append('Total')

    event_fraction = [total_event[nh, nj].sum() / total_event.sum() for nj in range(start_nj, end_nj)]
    event_fraction.append(total_event[nh, end_nj:].sum() / total_event.sum())
    event_fraction.append(total_event[nh].sum() / total_event.sum())

    event_efficiency = [correct_event[nh, nj].sum() / total_event[nh, nj].sum() for nj in range(start_nj, end_nj)]
    event_efficiency.append(correct_event[nh, end_nj:].sum() / total_event[nh, end_nj:].sum())
    event_efficiency.append(correct_event[nh].sum() / total_event[nh].sum())

    higgs_efficiency = [correct_Higgs[nh, nj].sum() / total_Higgs[nh, nj].sum() for nj in range(start_nj, end_nj)]
    higgs_efficiency.append(correct_Higgs[nh, end_nj:].sum() / total_Higgs[nh, end_nj:].sum())
    higgs_efficiency.append(correct_Higgs[nh].sum() / total_Higgs[nh].sum())
        
    result = {'Label':label,
              'Event type': event_type,
              'Event Fraction': event_fraction,
              'Event Efficiency': event_efficiency,
              'Higgs Efficiency': higgs_efficiency,
             }

    df = pd.DataFrame(result)

    return df

In [3]:
def compare_jet_list_triHiggs(pair1, pair2, nh_max=3):
    h1_true = {pair1[0], pair1[1]}
    h2_true = {pair1[2], pair1[3]}
    h3_true = {pair1[4], pair1[5]}  
    
    h1_test = {pair2[0], pair2[1]}
    h2_test = {pair2[2], pair2[3]}
    h3_test = {pair2[4], pair2[5]}
    
    test_h = [h1_test, h2_test, h3_test]
    
    nh = 0
    for id1, id2, id3 in itertools.permutations([0, 1, 2]):
        h1 = test_h[id1]
        h2 = test_h[id2]
        h3 = test_h[id3]
        if h1_true == h1:
            nh = 1
            if h2_true == h2:
                nh = 2
                if h3_true == h3:
                    nh = 3
                    break
                    
    same = True if nh==nh_max else False
    return same, nh

def compare_jet_list_triHiggs_optimized(pair1, pair2, nh_max=3):
    # 將pair1和pair2分別轉換為三個Higgs的集合
    h_true_sets = [{pair1[i], pair1[i+1]} for i in range(0, 6, 2)]
    h_test_sets = [{pair2[i], pair2[i+1]} for i in range(0, 6, 2)]
    
    # 計算匹配的Higgs數量
    nh = sum(1 for h_true in h_true_sets if h_true in h_test_sets)
    
    # 判斷是否所有Higgs都匹配
    same = nh == nh_max
    return same, nh

In [4]:
def get_particle_mask(quark_jet, particle_quarks):
    # quark_jet: 每個夸克對應的 jet 編號，shape 為 (n_event, 6)
    # particle_quarks: 粒子對應的夸克編號，shape 為 (n_quarks,)

    # 檢查是否每個夸克都有對應的 jet
    mask1 = np.all(quark_jet[:, particle_quarks] != -1, axis=1)

    # 對每一個事件，檢查每個夸克對應的 jet 都不重複
    count = np.array([[np.sum(event == event[i]) for i in particle_quarks] for event in quark_jet])
    mask2 = np.all(count == 1, axis=1)

    return mask1 & mask2

In [5]:
def get_Higgs_correct_fraction(events, nh, nj, jet_type='Nj'):
    # events: number of events in different categories (nh, nj, n_correct_h)
    start_nj, end_nj = nj

    label = [f'{nh}h' for _ in range(start_nj, end_nj+2)]

    total_event = events[nh].sum(axis=1)

    correct_3h_event = events[nh, :, 3]
    correct_2h_event = events[nh, :, 2]
    correct_1h_event = events[nh, :, 1]
    correct_0h_event = events[nh, :, 0]

    correct_Higgs = events[nh, :, 3] * 3 + events[nh, :, 2] * 2 + events[nh, :, 1]
        
    event_type = [f'{jet_type}={nj}' for nj in range(start_nj, end_nj)]
    event_type.append(f'{jet_type}>={end_nj}')
    event_type.append('Total')

    event_fraction = [total_event[nj] / total_event.sum() for nj in range(start_nj, end_nj)]
    event_fraction.append(total_event[end_nj:].sum() / total_event.sum())
    event_fraction.append(total_event.sum() / total_event.sum())


    eff_3h = [correct_3h_event[nj] / total_event[nj] for nj in range(start_nj, end_nj)]
    eff_3h.append(correct_3h_event[end_nj:].sum() / total_event[end_nj:].sum())
    eff_3h.append(correct_3h_event.sum() / total_event.sum())

    eff_2h = [correct_2h_event[nj] / total_event[nj] for nj in range(start_nj, end_nj)]
    eff_2h.append(correct_2h_event[end_nj:].sum() / total_event[end_nj:].sum())
    eff_2h.append(correct_2h_event.sum() / total_event.sum())

    eff_1h = [correct_1h_event[nj] / total_event[nj] for nj in range(start_nj, end_nj)]
    eff_1h.append(correct_1h_event[end_nj:].sum() / total_event[end_nj:].sum())
    eff_1h.append(correct_1h_event.sum() / total_event.sum())

    eff_0h = [correct_0h_event[nj] / total_event[nj] for nj in range(start_nj, end_nj)]
    eff_0h.append(correct_0h_event[end_nj:].sum() / total_event[end_nj:].sum())
    eff_0h.append(correct_0h_event.sum() / total_event.sum())

    eff_Higgs = [correct_Higgs[nj] / (total_event[nj] * nh) for nj in range(start_nj, end_nj)]
    eff_Higgs.append(correct_Higgs[end_nj:].sum() / (total_event[end_nj:].sum() * nh))
    eff_Higgs.append(correct_Higgs.sum() / (total_event.sum() * nh))
        
    result = {'Label':label,
              'Event type': event_type,
              'Event Fraction': event_fraction,
              '3h': eff_3h,
              '2h': eff_2h,
              '1h': eff_1h,
              '0h': eff_0h,
              'Higgs': eff_Higgs
             }

    df = pd.DataFrame(result)

    return df


In [6]:
# 載入正確配對與測試配對的資料，並計算配對的效率
def compute_pairing_efficiency(true_file, test_file):
    MAX_JETS = 15

    with h5py.File(true_file, 'r') as f_true, h5py.File(test_file, 'r') as f_test:
        
        # events: 總共有多少該類事件 (nh, nj, n_correct_h)
        events = np.zeros((4, MAX_JETS + 1, 4))

        nevent = f_true['INPUTS/Source/pt'].shape[0]

        for event in tqdm(range(nevent)):

            nj = f_true['INPUTS/Source/MASK'][event].sum()

            h1_b1 = f_true['TARGETS/h1/b1'][event]
            h1_b2 = f_true['TARGETS/h1/b2'][event]
            h2_b1 = f_true['TARGETS/h2/b1'][event]
            h2_b2 = f_true['TARGETS/h2/b2'][event]
            h3_b1 = f_true['TARGETS/h3/b1'][event]
            h3_b2 = f_true['TARGETS/h3/b2'][event]

            quark_jet = np.array([h1_b1, h1_b2, h2_b1, h2_b2, h3_b1, h3_b2]).reshape(1, 6)

            h1_mask = get_particle_mask(quark_jet, [0, 1])
            h2_mask = get_particle_mask(quark_jet, [2, 3])
            h3_mask = get_particle_mask(quark_jet, [4, 5])

            event_h = [h1_mask, h2_mask, h3_mask].count(True)

            true_pair = [h1_b1,h1_b2, h2_b1,h2_b2, h3_b1,h3_b2]


            h1_b1 = f_test['TARGETS/h1/b1'][event]
            h1_b2 = f_test['TARGETS/h1/b2'][event]
            h2_b1 = f_test['TARGETS/h2/b1'][event]
            h2_b2 = f_test['TARGETS/h2/b2'][event]
            h3_b1 = f_test['TARGETS/h3/b1'][event]
            h3_b2 = f_test['TARGETS/h3/b2'][event]
            pair = [h1_b1,h1_b2, h2_b1,h2_b2, h3_b1,h3_b2]


            if event_h == 3:
                _, nh = compare_jet_list_triHiggs(true_pair, pair, nh_max=3)
                events[3, nj, nh] += 1
            elif event_h == 2:
                _, nh = compare_jet_list_triHiggs(true_pair, pair, nh_max=2)
                events[2, nj, nh] += 1
            elif event_h == 1:
                _, nh = compare_jet_list_triHiggs(true_pair, pair, nh_max=1)
                events[1, nj, nh] += 1
            elif event_h == 0:
                events[0, nj, 0] += 1
        
        
        df_3h = get_Higgs_correct_fraction(events, nh=3, nj=(6, 8), jet_type='Nj')  
        print('3 Higgs Events:')
        print(df_3h)

        df_style = df_3h.style.format({
            'Event Fraction': '{:.3f}',
            '3h': '{:.3f}',
            '2h': '{:.3f}',
            '1h': '{:.3f}',
            '0h': '{:.3f}',
            'Higgs': '{:.3f}'
        })
        print(df_style.to_latex(column_format='c|cccc|c'))


# Test on $6b$ dataset

In [8]:
file_path = '../SPANet2/data/triHiggs/gghhh_6b_PT40_test.h5'
output_path = '../SPANet2/data/triHiggs/gghhh_6b_PT40_test-chi2_pairing.h5'

compute_pairing_efficiency(file_path, output_path)

100%|██████████| 40000/40000 [01:29<00:00, 445.38it/s]


3 Higgs Events:
  Label Event type  Event Fraction        3h        2h        1h        0h  \
0    3h       Nj=6        0.210876  0.509454  0.000000  0.123599  0.366947   
1    3h       Nj=7        0.318160  0.428986  0.004874  0.135414  0.430726   
2    3h      Nj>=8        0.470964  0.310653  0.010818  0.148154  0.530375   
3    3h      Total        1.000000  0.390224  0.006645  0.138923  0.464208   

      Higgs  
0  0.550654  
1  0.477373  
2  0.367249  
3  0.440962  
\begin{tabular}{c|cccc|c}
 & Label & Event type & Event Fraction & 3h & 2h & 1h & 0h & Higgs \\
0 & 3h & Nj=6 & 0.211 & 0.509 & 0.000 & 0.124 & 0.367 & 0.551 \\
1 & 3h & Nj=7 & 0.318 & 0.429 & 0.005 & 0.135 & 0.431 & 0.477 \\
2 & 3h & Nj>=8 & 0.471 & 0.311 & 0.011 & 0.148 & 0.530 & 0.367 \\
3 & 3h & Total & 1.000 & 0.390 & 0.007 & 0.139 & 0.464 & 0.441 \\
\end{tabular}



In [12]:
file_path = '../SPANet2/data/triHiggs/gghhh_6b_PT40_test.h5'
output_path = '../SPANet2/data/triHiggs/gghhh_6b_PT40_test-abs_pairing.h5'

compute_pairing_efficiency(file_path, output_path)

100%|██████████| 40000/40000 [01:31<00:00, 438.53it/s]

3 Higgs Events:
  Label Event type  Event Fraction        3h        2h        1h        0h  \
0    3h       Nj=6        0.210876  0.456057  0.000000  0.126225  0.417717   
1    3h       Nj=7        0.318160  0.388257  0.004293  0.141332  0.466117   
2    3h      Nj>=8        0.470964  0.279925  0.009877  0.152622  0.557576   
3    3h      Total        1.000000  0.351534  0.006018  0.143464  0.498985   

      Higgs  
0  0.498133  
1  0.438230  
2  0.337383  
3  0.403367  
\begin{tabular}{c|cccc|c}
 & Label & Event type & Event Fraction & 3h & 2h & 1h & 0h & Higgs \\
0 & 3h & Nj=6 & 0.211 & 0.456 & 0.000 & 0.126 & 0.418 & 0.498 \\
1 & 3h & Nj=7 & 0.318 & 0.388 & 0.004 & 0.141 & 0.466 & 0.438 \\
2 & 3h & Nj>=8 & 0.471 & 0.280 & 0.010 & 0.153 & 0.558 & 0.337 \\
3 & 3h & Total & 1.000 & 0.352 & 0.006 & 0.143 & 0.499 & 0.403 \\
\end{tabular}






In [11]:
file_path = '../SPANet2/data/triHiggs/gghhh_6b_PT40_test.h5'
output_path = '../SPANet2/data/triHiggs/gghhh_6b_PT40_test-6b_SPANet_pairing.h5'

compute_pairing_efficiency(file_path, output_path)

100%|██████████| 40000/40000 [01:31<00:00, 435.51it/s]

3 Higgs Events:
  Label Event type  Event Fraction        3h        2h        1h        0h  \
0    3h       Nj=6        0.210876  0.417367  0.000000  0.136555  0.446078   
1    3h       Nj=7        0.318160  0.317707  0.003133  0.155721  0.523439   
2    3h      Nj>=8        0.470964  0.219174  0.010034  0.171827  0.598965   
3    3h      Total        1.000000  0.292317  0.005722  0.159265  0.542696   

      Higgs  
0  0.462885  
1  0.371703  
2  0.283139  
3  0.349220  
\begin{tabular}{c|cccc|c}
 & Label & Event type & Event Fraction & 3h & 2h & 1h & 0h & Higgs \\
0 & 3h & Nj=6 & 0.211 & 0.417 & 0.000 & 0.137 & 0.446 & 0.463 \\
1 & 3h & Nj=7 & 0.318 & 0.318 & 0.003 & 0.156 & 0.523 & 0.372 \\
2 & 3h & Nj>=8 & 0.471 & 0.219 & 0.010 & 0.172 & 0.599 & 0.283 \\
3 & 3h & Total & 1.000 & 0.292 & 0.006 & 0.159 & 0.543 & 0.349 \\
\end{tabular}






In [9]:
file_path = '../SPANet2/data/triHiggs/gghhh_6b_PT40_test.h5'
output_path = '../SPANet2/data/triHiggs/gghhh_6b_PT40_test-4b_SPANet_pairing.h5'

compute_pairing_efficiency(file_path, output_path)

100%|██████████| 40000/40000 [01:31<00:00, 437.96it/s]

3 Higgs Events:
  Label Event type  Event Fraction        3h        2h        1h        0h  \
0    3h       Nj=6        0.210876  0.648109  0.000000  0.087885  0.264006   
1    3h       Nj=7        0.318160  0.538988  0.005222  0.119633  0.336157   
2    3h      Nj>=8        0.470964  0.390217  0.012621  0.160383  0.436780   
3    3h      Total        1.000000  0.491933  0.007605  0.132130  0.368332   

      Higgs  
0  0.677404  
1  0.582347  
2  0.452092  
3  0.541047  
\begin{tabular}{c|cccc|c}
 & Label & Event type & Event Fraction & 3h & 2h & 1h & 0h & Higgs \\
0 & 3h & Nj=6 & 0.211 & 0.648 & 0.000 & 0.088 & 0.264 & 0.677 \\
1 & 3h & Nj=7 & 0.318 & 0.539 & 0.005 & 0.120 & 0.336 & 0.582 \\
2 & 3h & Nj>=8 & 0.471 & 0.390 & 0.013 & 0.160 & 0.437 & 0.452 \\
3 & 3h & Total & 1.000 & 0.492 & 0.008 & 0.132 & 0.368 & 0.541 \\
\end{tabular}




