Logistic regression models trained on introductory data in $\mathbb{R}^9$ and $\mathbb{R}^{33}$

In [266]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [80]:
log_reg_init_350Gm = sigbg_model(make_pipeline(StandardScaler(), LogisticRegression()), init_350Gm_train, init_350Gm_test);
log_reg_init_350Gm.fit()
log_reg_init_350Gm.accuracy()

Accuracy: 82.94%


In [81]:
log_reg_init_1Tm = sigbg_model(make_pipeline(StandardScaler(), LogisticRegression()), init_1Tm_train, init_1Tm_test);
log_reg_init_1Tm.fit()
log_reg_init_1Tm.accuracy()

Accuracy: 96.75%


In [82]:
log_reg_f350Gm = sigbg_model(make_pipeline(StandardScaler(), LogisticRegression()), f350Gm_data_train, f350Gm_data_test);
log_reg_f350Gm.fit()
log_reg_f350Gm.accuracy()

Accuracy: 70.39%


In [83]:
log_reg_f1Tm = sigbg_model(make_pipeline(StandardScaler(), LogisticRegression()), f1Tm_data_train, f1Tm_data_test);
log_reg_f1Tm.fit()
log_reg_f1Tm.accuracy()

Accuracy: 94.54%


In [84]:
log_reg_f1p5Tm = sigbg_model(make_pipeline(StandardScaler(), LogisticRegression()), f1p5Tm_data_train, f1p5Tm_data_test);
log_reg_f1p5Tm.fit()
log_reg_f1p5Tm.accuracy()

Accuracy: 98.05%


In [85]:
log_reg_f2Tm = sigbg_model(make_pipeline(StandardScaler(), LogisticRegression()), f2Tm_data_train, f2Tm_data_test);
log_reg_f2Tm.fit()
log_reg_f2Tm.accuracy()

Accuracy: 99.11%


In [70]:
log_reg_init_350Gm = refresh_model(log_reg_init_350Gm)
log_reg_init_1Tm = refresh_model(log_reg_init_1Tm)
log_reg_f350Gm = refresh_model(log_reg_f350Gm)
log_reg_f1Tm = refresh_model(log_reg_f1Tm)
log_reg_f1p5Tm = refresh_model(log_reg_f1p5Tm)
log_reg_f2Tm = refresh_model(log_reg_f2Tm)

In [154]:
for model in [log_reg_init_350Gm, 
              log_reg_init_1Tm,
              log_reg_f350Gm,
              log_reg_f1Tm,
              log_reg_f1p5Tm,
              log_reg_f2Tm]:
    print(model.confusion_matrix())

[[41.3976  8.5444]
 [ 8.5204 41.5376]]
[[48.5582  1.8916]
 [ 1.3598 48.1904]]
[[37.6792 17.374 ]
 [12.2388 32.708 ]]
[[46.1394  1.6764]
 [ 3.7786 48.4056]]
[[48.5644  0.5924]
 [ 1.3536 49.4896]]
[[49.3748  0.3458]
 [ 0.5432 49.7362]]


In [135]:
path = '../saved_models/log_reg/log_reg_'
for name, model in zip([path + name for name in names],
                       [log_reg_init_350Gm, 
                        log_reg_init_1Tm,
                        log_reg_f350Gm,
                        log_reg_f1Tm,
                        log_reg_f1p5Tm,
                        log_reg_f2Tm]):
    model.save_model(name)

In [23]:
path = '../saved_models/log_reg/log_reg_'
log_reg_init_350Gm, log_reg_init_1Tm, log_reg_f350Gm, \
    log_reg_f1Tm, log_reg_f1p5Tm, log_reg_f2Tm = \
    [sigbg_model(joblib.load(path + name + '.joblib'), train, test) for name, train, test in zip(names, trains, tests)]

In [29]:
for model in [log_reg_init_350Gm, log_reg_init_1Tm, log_reg_f350Gm, log_reg_f1Tm, log_reg_f1p5Tm, log_reg_f2Tm]:
    print(model.ppv(), model.tpr(), model.ppv()*model.tpr())

0.829 0.829 0.6872409999999999
0.973 0.963 0.9369989999999999
0.755 0.684 0.51642
0.924 0.965 0.89166
0.973 0.988 0.961324
0.989 0.993 0.982077


In [58]:
lumis = np.array([120e15, 3000e15]) # barn^{-1}
sig350Gm_cs = 0.01e-12 # barn
sig1Tm_cs = 0.000394e-12 # barn
bg_cs = 0.106e-12 + 0.0117e-12 + 5.58e-12

for model, s_cs, b_cs in zip(
    [log_reg_init_350Gm, log_reg_init_1Tm, log_reg_f350Gm, log_reg_f1Tm, log_reg_f1p5Tm, log_reg_f2Tm],
    [sig350Gm_cs, sig1Tm_cs, sig350Gm_cs, sig1Tm_cs],
    [bg_cs] * 4):
    print('Require {} fb^-1 lumi for 5 sigma discovery potential'.format(model.lumi_req(s_cs, b_cs)))
    print('Require {} fb^-1 lumi for 2 sigma discovery potential'.format(model.lumi_req(s_cs, b_cs, significance=2)))
    print('For 120 fb^-1 lumi we have significance of {}'.format(model.significance(lumis[0], s_cs, b_cs)))
    print('For 3000 fb^-1 lumi we have significance of {}'.format(model.significance(lumis[1], s_cs, b_cs)))

Require 3021.224687021261 fb^-1 lumi for 5 sigma discovery potential
Require 483.3959499234018 fb^-1 lumi for 2 sigma discovery potential
For 120 fb^-1 lumi we have significance of 0.9964812124846995
For 3000 fb^-1 lumi we have significance of 4.982406062423498
Require 1045198.8414033154 fb^-1 lumi for 5 sigma discovery potential
Require 167231.81462453047 fb^-1 lumi for 2 sigma discovery potential
For 120 fb^-1 lumi we have significance of 0.05357487513718085
For 3000 fb^-1 lumi we have significance of 0.2678743756859042
Require 5350.508277231899 fb^-1 lumi for 5 sigma discovery potential
Require 856.0813243571039 fb^-1 lumi for 2 sigma discovery potential
For 120 fb^-1 lumi we have significance of 0.7487952956115083
For 3000 fb^-1 lumi we have significance of 3.7439764780575415
Require 1154193.449798647 fb^-1 lumi for 5 sigma discovery potential
Require 184670.9519677836 fb^-1 lumi for 2 sigma discovery potential
For 120 fb^-1 lumi we have significance of 0.05098252310281941
For 3000

In [60]:
print((5 / sig350Gm_cs) * np.sqrt((sig350Gm_cs + bg_cs) / lumis[1]))
print((5 / sig1Tm_cs) * np.sqrt((sig1Tm_cs + bg_cs) / lumis[1]))

0.6896677944247264
17.489522795914546


In [71]:
for model, signal in zip(
    [log_reg_init_350Gm, log_reg_init_1Tm, log_reg_f350Gm, log_reg_f1Tm],
    [cross_sec * 3000e15 for cross_sec in [sig350Gm_cs, sig1Tm_cs, sig350Gm_cs, sig1Tm_cs]]):
#     print(signal, bg_cs * 3000e-15)
    print(model.ams(signal, bg_cs * 3000e15))

14.54679482490263
1.4123474451544829
8.425626480420016
1.5187189196145958
