In [1]:
import pandas as pd

In [2]:
def print_ACC_AUC(df, sample_type):
    ACC = df[df['Sample Type'] == sample_type]['ACC']
    AUC = df[df['Sample Type'] == sample_type]['AUC']

    print(sample_type)
    print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
    print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
    print(f'${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$')

    ACC = df[df['Sample Type'] == sample_type]['ACC-true']
    AUC = df[df['Sample Type'] == sample_type]['AUC-true']

    print('True ACC and AUC')
    print(f'ACC: {ACC.mean():.3f} ± {ACC.std():.3f}')
    print(f'AUC: {AUC.mean():.3f} ± {AUC.std():.3f}')
    print(f'${ACC.mean():.3f} \pm {ACC.std():.3f}$ & ${AUC.mean():.3f} \pm {AUC.std():.3f}$')

# Luminosity 300 $\text{fb}^{-1}$

In [3]:
file_name = './GGF_VBF_CWoLa_training_results.csv'
df = pd.read_csv(file_name)
df = df.query("time > '2025-03-08' and time < '2025-03-09'")

In [4]:
sample_type = 'mjj: 300 GeV'
print_ACC_AUC(df, sample_type)

mjj: 300 GeV
ACC: 0.712 ± 0.023
AUC: 0.741 ± 0.041
$0.712 \pm 0.023$ & $0.741 \pm 0.041$
True ACC and AUC
ACC: 0.576 ± 0.010
AUC: 0.596 ± 0.014
$0.576 \pm 0.010$ & $0.596 \pm 0.014$


In [5]:
sample_type = 'deta: 3.1'
print_ACC_AUC(df, sample_type)

deta: 3.1
ACC: 0.828 ± 0.043
AUC: 0.889 ± 0.050
$0.828 \pm 0.043$ & $0.889 \pm 0.050$
True ACC and AUC
ACC: 0.604 ± 0.014
AUC: 0.630 ± 0.015
$0.604 \pm 0.014$ & $0.630 \pm 0.015$


In [6]:
sample_type = 'mjj: 300 GeV, deta: 3.1'
print_ACC_AUC(df, sample_type)

mjj: 300 GeV, deta: 3.1
ACC: 0.753 ± 0.022
AUC: 0.792 ± 0.035
$0.753 \pm 0.022$ & $0.792 \pm 0.035$
True ACC and AUC
ACC: 0.573 ± 0.007
AUC: 0.596 ± 0.008
$0.573 \pm 0.007$ & $0.596 \pm 0.008$


# Luminosity 3000 $\text{fb}^{-1}$

In [7]:
file_name = './GGF_VBF_CWoLa_training_results.csv'
df = pd.read_csv(file_name)
df = df.query("time > '2025-03-08' and time < '2025-03-09'")

In [8]:
sample_type = 'mjj: 300 GeV, L: 3000 fb^-1'
print_ACC_AUC(df, sample_type)

mjj: 300 GeV, L: 3000 fb^-1
ACC: 0.907 ± 0.002
AUC: 0.969 ± 0.002
$0.907 \pm 0.002$ & $0.969 \pm 0.002$
True ACC and AUC
ACC: 0.598 ± 0.008
AUC: 0.625 ± 0.009
$0.598 \pm 0.008$ & $0.625 \pm 0.009$


In [9]:
sample_type = 'deta: 3.1, L: 3000 fb^-1'
print_ACC_AUC(df, sample_type)

deta: 3.1, L: 3000 fb^-1
ACC: 0.931 ± 0.004
AUC: 0.979 ± 0.002
$0.931 \pm 0.004$ & $0.979 \pm 0.002$
True ACC and AUC
ACC: 0.615 ± 0.005
AUC: 0.648 ± 0.006
$0.615 \pm 0.005$ & $0.648 \pm 0.006$


In [10]:
sample_type = 'mjj: 300 GeV, deta: 3.1, L: 3000 fb^-1'
print_ACC_AUC(df, sample_type)

mjj: 300 GeV, deta: 3.1, L: 3000 fb^-1
ACC: 0.929 ± 0.003
AUC: 0.978 ± 0.002
$0.929 \pm 0.003$ & $0.978 \pm 0.002$
True ACC and AUC
ACC: 0.608 ± 0.004
AUC: 0.638 ± 0.005
$0.608 \pm 0.004$ & $0.638 \pm 0.005$


# Luminosity 3000 $\text{fb}^{-1}$ with $p_{\text{T}}$ normalization

In [11]:
file_name = './GGF_VBF_CWoLa_training_results.csv'
df = pd.read_csv(file_name)
df = df.query("time > '2025-03-16' and time < '2025-03-17'")

In [12]:
sample_type = 'mjj: 300 GeV, L: 3000 fb^-1, pT normalized'
print_ACC_AUC(df, sample_type)

mjj: 300 GeV, L: 3000 fb^-1, pT normalized
ACC: 0.874 ± 0.004
AUC: 0.946 ± 0.003
$0.874 \pm 0.004$ & $0.946 \pm 0.003$
True ACC and AUC
ACC: 0.624 ± 0.005
AUC: 0.663 ± 0.006
$0.624 \pm 0.005$ & $0.663 \pm 0.006$


In [13]:
sample_type = 'deta: 3.1, L: 3000 fb^-1, pT normalized'
print_ACC_AUC(df, sample_type)

deta: 3.1, L: 3000 fb^-1, pT normalized
ACC: 0.928 ± 0.005
AUC: 0.979 ± 0.002
$0.928 \pm 0.005$ & $0.979 \pm 0.002$
True ACC and AUC
ACC: 0.597 ± 0.005
AUC: 0.630 ± 0.006
$0.597 \pm 0.005$ & $0.630 \pm 0.006$


In [14]:
sample_type = 'mjj: 300 GeV, deta: 3.1, L: 3000 fb^-1, pT normalized'
print_ACC_AUC(df, sample_type)

mjj: 300 GeV, deta: 3.1, L: 3000 fb^-1, pT normalized
ACC: 0.917 ± 0.003
AUC: 0.973 ± 0.002
$0.917 \pm 0.003$ & $0.973 \pm 0.002$
True ACC and AUC
ACC: 0.603 ± 0.004
AUC: 0.636 ± 0.006
$0.603 \pm 0.004$ & $0.636 \pm 0.006$


# Luminosity 3000 $\text{fb}^{-1}$ with $p_{\text{T}}$ normalization, $m_{jj}$: 225 GeV, $\Delta\eta$: 2.3

In [15]:
file_name = './GGF_VBF_CWoLa_training_results.csv'
df = pd.read_csv(file_name)
df = df.query("time > '2025-03-16' and time < '2025-03-17'")

In [16]:
sample_type = 'mjj: 225 GeV, L: 3000 fb^-1, pT normalized'
print_ACC_AUC(df, sample_type)
print()

sample_type = 'deta: 2.3, L: 3000 fb^-1, pT normalized'
print_ACC_AUC(df, sample_type)
print()

sample_type = 'mjj: 225 GeV, deta: 2.3, L: 3000 fb^-1, pT normalized'
print_ACC_AUC(df, sample_type)

mjj: 225 GeV, L: 3000 fb^-1, pT normalized
ACC: 0.864 ± 0.004
AUC: 0.940 ± 0.004
$0.864 \pm 0.004$ & $0.940 \pm 0.004$
True ACC and AUC
ACC: 0.632 ± 0.006
AUC: 0.673 ± 0.007
$0.632 \pm 0.006$ & $0.673 \pm 0.007$

deta: 2.3, L: 3000 fb^-1, pT normalized
ACC: 0.913 ± 0.006
AUC: 0.972 ± 0.003
$0.913 \pm 0.006$ & $0.972 \pm 0.003$
True ACC and AUC
ACC: 0.605 ± 0.007
AUC: 0.640 ± 0.009
$0.605 \pm 0.007$ & $0.640 \pm 0.009$

mjj: 225 GeV, deta: 2.3, L: 3000 fb^-1, pT normalized
ACC: 0.896 ± 0.007
AUC: 0.961 ± 0.004
$0.896 \pm 0.007$ & $0.961 \pm 0.004$
True ACC and AUC
ACC: 0.616 ± 0.005
AUC: 0.653 ± 0.006
$0.616 \pm 0.005$ & $0.653 \pm 0.006$


# Supervised CNN: 200k+50k+50k

In [17]:
file_name = './GGF_VBF_CWoLa_training_results.csv'
df = pd.read_csv(file_name)
df = df.query("time > '2025-03-21' and time < '2025-03-22'")

In [18]:
sample_type = 'pure, pT normalized'
print_ACC_AUC(df, sample_type)
print()

pure, pT normalized
ACC: 0.785 ± 0.001
AUC: 0.863 ± 0.001
$0.785 \pm 0.001$ & $0.863 \pm 0.001$
True ACC and AUC
ACC: 0.784 ± 0.001
AUC: 0.861 ± 0.001
$0.784 \pm 0.001$ & $0.861 \pm 0.001$

