# Logistic Regression

In [17]:
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [18]:
DA = DataAccess()
PI = PhysicsInfo()
lumi = 3000

In [19]:
print(DA.getnpy(0).size/1000000)

47.0


# Ad hoc helper function for model evaluation

In [877]:
def results(model, mass, lumi=3000):
    print('TPR: {}; FPR: {}'.format(round(model.tpr(),3), round(model.fpr(),7)))
    print('Signal Significance @ L = {} fb^-1: {}'.format(
        lumi, round(model.metric(PI.sig_cs(mass) * lumi * PI.conv, PI.bg_cs() * lumi * PI.conv),3)))
    print('Mass sensitivity maximum @ L = {} fb^-1: {} GeV'.format(
        lumi, 
        np.round(PI.Zp_mass(model.req_sig_cs(lumi, PI.bg_cs(), model.tpr(), model.fpr())),2)))
    threshold, significance, tpr, fpr, _, _ = model.best_threshold(
        PI.sig_cs(mass) * lumi * PI.conv, PI.bg_cs() * lumi * PI.conv)
    print(fpr)
    print('Maximal significance of {} @ threshold = {}'.format(round(significance,3), threshold,4))
    print('(sig. maximized with tpr = {}, fpr = {})'.format(round(tpr,3), round(fpr,7)))
    print('(new mass sensitivity of {} GeV)'.format(
        np.round(PI.Zp_mass(model.req_sig_cs(lumi, PI.bg_cs(), tpr, fpr)),2)))

In [889]:
log_reg_350Gm = refresh_model(log_reg_350Gm)
log_reg_500Gm = refresh_model(log_reg_500Gm)
log_reg_1Tm = refresh_model(log_reg_1Tm)
log_reg_2Tm = refresh_model(log_reg_2Tm)
log_reg_4Tm = refresh_model(log_reg_4Tm)
logreg_models = [log_reg_350Gm, log_reg_500Gm, log_reg_1Tm, log_reg_2Tm, log_reg_4Tm]

# Unedited 47-dim data 

(referred to as 50 dim)

## $m_{Z'} = 350$ GeV

In [20]:
data50_350Gm_train, data50_350Gm_test = DA.gettraintest(0)
log_reg_350Gm = sigbg_model(
    make_pipeline(StandardScaler(), LogisticRegression()), data50_350Gm_train, data50_350Gm_test);
log_reg_350Gm.fit()

In [885]:
results(log_reg_350Gm, 350)

TPR: 0.863; FPR: 0.1496614
Signal Significance @ L = 3000 fb^-1: 16.098
3000 5.6977 0.8627650652929196 0.14966144476942184
Mass sensitivity maximum @ L = 3000 fb^-1: 536.39 GeV
0.06685363996955006
Maximal significance of 17.624 @ threshold = 0.762392142482093
(sig. maximized with tpr = 0.633, fpr = 0.0668536)
3000 5.6977 0.6333293398825925 0.06685363996955006
(new mass sensitivity of 553.37 GeV)


In [248]:
indices = np.argsort(-np.abs(log_reg_350Gm.model[1].coef_[0]))
coef_sort = [[DA.cols[i], log_reg_350Gm.model[1].coef_[0,i]] for i in indices]
max_col = max([len(name) for name in DA.cols])
for name, val in coef_sort:
    sign = '+' if val >= 0 else '-'
    print('{0:{1}} {3}{2:.4f}'.format(name + ':', max_col + 3, np.abs(val), sign))

M j1 j2:       -4.0312
pT b1:         +1.3697
M b1 b2:       -0.5617
MT b1 l MET:   +0.5535
pT j2:         -0.5340
M b1 b4:       +0.4948
pT b3:         +0.4510
dR b1 b2:      +0.4217
pT l:          -0.4095
MET:           -0.3725
pT b2:         +0.3644
pT b4:         +0.3605
MT b2 l MET:   +0.3421
MT b4 l MET:   -0.2827
M b2 b4:       +0.2605
dR b1 b4:      -0.2076
MT l MET:      -0.2063
dR j1 j2:      -0.1503
M b2 b3:       +0.1411
dR b1 b3:      +0.1409
dR b3 b4:      +0.1281
dR b2 b4:      -0.1124
dR b2 l:       -0.0760
dR b3 l:       -0.0549
dR b2 b3:      +0.0499
M b3 b4:       +0.0488
M b1 b3:       +0.0436
dR b4 l:       +0.0248
pT j1:         -0.0237
MT b3 l MET:   -0.0130
dR b1 l:       +0.0027
sdPhi b2 l:    -0.0019
sdPhi b2 b4:   -0.0016
sdPhi b1 b2:   +0.0015
sdEta b2 b3:   +0.0014
sdPhi b2 b3:   -0.0013
sdEta b1 b3:   +0.0012
sdEta b2 b4:   +0.0011
sdEta b1 b4:   +0.0008
sdPhi b3 l:    -0.0006
sdPhi b4 l:    -0.0004
sdEta b1 b2:   -0.0004
sdPhi b1 b3:   +0.0003
sdPhi b1 l:

## $m_{Z'} = 500$ GeV

In [21]:
data50_500Gm_train, data50_500Gm_test = DA.gettraintest(1)
log_reg_500Gm = sigbg_model(
    make_pipeline(StandardScaler(), LogisticRegression()), data50_500Gm_train, data50_500Gm_test);
log_reg_500Gm.fit()

In [883]:
results(log_reg_500Gm, 500)

TPR: 0.927; FPR: 0.0919067
Signal Significance @ L = 3000 fb^-1: 8.407
3000 5.6977 0.9269118645421508 0.09190672703233302
Mass sensitivity maximum @ L = 3000 fb^-1: 595.31 GeV
0.037593653591890694
Maximal significance of 10.144 @ threshold = 0.8502001374392226
(sig. maximized with tpr = 0.717, fpr = 0.0375937)
3000 5.6977 0.7174753404416756 0.037593653591890694
(new mass sensitivity of 632.74 GeV)


In [249]:
indices = np.argsort(-np.abs(log_reg_500Gm.model[1].coef_[0]))
coef_sort = [[DA.cols[i], log_reg_500Gm.model[1].coef_[0,i]] for i in indices]
max_col = max([len(name) for name in DA.cols])
for name, val in coef_sort:
    sign = '+' if val >= 0 else '-'
    print('{0:{1}} {3}{2:.4f}'.format(name + ':', max_col + 3, np.abs(val), sign))

M j1 j2:       -4.4908
pT b1:         +1.4705
M b1 b4:       +0.6756
pT b2:         +0.6446
M b1 b2:       +0.6063
pT j2:         -0.5356
M b1 b3:       +0.4427
MT b1 l MET:   +0.4011
MT b4 l MET:   -0.3388
pT l:          -0.3344
MET:           -0.3143
M b2 b4:       +0.2825
MT b2 l MET:   +0.2739
dR b1 b4:      -0.2739
MT l MET:      -0.2179
pT b4:         +0.1906
M b2 b3:       +0.1831
dR b1 b2:      +0.1721
M b3 b4:       +0.1378
dR b2 b3:      +0.1294
dR b2 b4:      -0.0976
dR b3 b4:      +0.0891
dR j1 j2:      -0.0831
MT b3 l MET:   -0.0782
pT j1:         -0.0704
dR b3 l:       -0.0509
dR b1 l:       +0.0425
dR b1 b3:      +0.0421
pT b3:         +0.0262
dR b2 l:       -0.0210
dR b4 l:       -0.0184
sdEta b2 b3:   +0.0028
sdPhi b1 b3:   +0.0020
sdPhi b1 b2:   +0.0019
sdEta b1 b2:   -0.0018
sdEta b3 b4:   -0.0018
sdPhi b1 b4:   +0.0017
sdPhi b3 l:    -0.0014
sdPhi b2 l:    -0.0014
sdEta b1 b3:   +0.0013
sdPhi b4 l:    -0.0010
sdEta b1 b4:   -0.0007
sdPhi b1 l:    +0.0007
sdEta b2 b4

## $m_{Z'} = 1$ TeV

In [22]:
data50_1Tm_train, data50_1Tm_test = DA.gettraintest(2)
log_reg_1Tm = sigbg_model(
    make_pipeline(StandardScaler(), LogisticRegression()), data50_1Tm_train, data50_1Tm_test);
log_reg_1Tm.fit()

In [881]:
results(log_reg_1Tm, 1000)

TPR: 0.987; FPR: 0.0189631
Signal Significance @ L = 3000 fb^-1: 2.044
3000 5.6977 0.9872888462920809 0.01896309948315237
Mass sensitivity maximum @ L = 3000 fb^-1: 774.91 GeV
0.0036700188308826476
Maximal significance of 3.576 @ threshold = 0.9871384593618832
(sig. maximized with tpr = 0.764, fpr = 0.00367)
3000 5.6977 0.763871251148117 0.0036700188308826476
(new mass sensitivity of 910.65 GeV)


In [250]:
indices = np.argsort(-np.abs(log_reg_1Tm.model[1].coef_[0]))
coef_sort = [[DA.cols[i], log_reg_1Tm.model[1].coef_[0,i]] for i in indices]
max_col = max([len(name) for name in DA.cols])
for name, val in coef_sort:
    sign = '+' if val >= 0 else '-'
    print('{0:{1}} {3}{2:.4f}'.format(name + ':', max_col + 3, np.abs(val), sign))

M j1 j2:       -5.1142
M b1 b2:       +4.2566
M b1 b3:       +1.9642
M b1 b4:       +1.5152
pT b3:         -1.1155
dR b1 b2:      -0.8560
pT b1:         +0.7047
M b2 b3:       +0.6890
dR b1 b4:      -0.5530
M b2 b4:       +0.4489
pT j2:         -0.4179
pT b4:         -0.4131
MT b4 l MET:   -0.3616
M b3 b4:       +0.3613
dR b1 b3:      -0.3395
MT l MET:      -0.2765
dR j1 j2:      +0.1919
MT b3 l MET:   -0.1690
dR b4 l:       -0.1568
MT b1 l MET:   -0.1125
dR b2 b4:      -0.1065
dR b3 l:       -0.0967
dR b2 b3:      +0.0835
pT j1:         -0.0793
MET:           -0.0740
dR b1 l:       +0.0244
pT l:          -0.0169
dR b3 b4:      +0.0152
MT b2 l MET:   +0.0088
sdPhi b2 b4:   -0.0045
sdPhi b3 b4:   -0.0044
sdPhi b1 b3:   +0.0044
sdPhi b1 b2:   +0.0042
sdPhi b2 l:    -0.0042
sdPhi b3 l:    -0.0041
sdEta b1 b2:   -0.0025
dR b2 l:       +0.0024
sdEta b2 b3:   +0.0013
sdEta b1 b4:   -0.0012
sdEta b1 b3:   -0.0010
sdEta b2 b4:   +0.0008
sdPhi b1 l:    +0.0006
sdPhi b1 b4:   +0.0003
sdPhi b4 l:

## $m_{Z'} = 2$ TeV

In [23]:
data50_2Tm_train, data50_2Tm_test = DA.gettraintest(3)
log_reg_2Tm = sigbg_model(
    make_pipeline(StandardScaler(), LogisticRegression()), data50_2Tm_train, data50_2Tm_test);
log_reg_2Tm.fit()

In [880]:
results(log_reg_2Tm, 2000)

TPR: 0.996; FPR: 0.0027364
Signal Significance @ L = 3000 fb^-1: 0.281
3000 5.6977 0.9955381425744759 0.002736378205128205
Mass sensitivity maximum @ L = 3000 fb^-1: 1019.74 GeV
0.0
Maximal significance of 2.839 @ threshold = 0.9999978157577915
(sig. maximized with tpr = 0.132, fpr = 0.0)
3000 5.6977 0.132089899332298 0.0
(new mass sensitivity of 1576.37 GeV)


In [251]:
indices = np.argsort(-np.abs(log_reg_2Tm.model[1].coef_[0]))
coef_sort = [[DA.cols[i], log_reg_2Tm.model[1].coef_[0,i]] for i in indices]
max_col = max([len(name) for name in DA.cols])
for name, val in coef_sort:
    sign = '+' if val >= 0 else '-'
    print('{0:{1}} {3}{2:.4f}'.format(name + ':', max_col + 3, np.abs(val), sign))

M b1 b2:       +5.8411
M j1 j2:       -3.6355
M b1 b3:       +2.3340
M b1 b4:       +1.6473
pT b3:         -1.4506
pT b1:         +1.2551
M b2 b3:       +1.0994
dR b1 b2:      -0.8530
M b2 b4:       +0.6779
dR b1 b4:      -0.5674
pT b2:         -0.5453
pT b4:         -0.4746
pT j2:         -0.4196
dR b1 b3:      -0.4094
M b3 b4:       +0.2987
MT b4 l MET:   -0.2783
dR b2 b4:      -0.2769
MT b1 l MET:   -0.2403
MT l MET:      -0.2299
dR j1 j2:      +0.2085
dR b4 l:       -0.1826
dR b2 b3:      -0.1806
MT b3 l MET:   -0.1726
pT j1:         -0.1542
dR b3 l:       -0.1305
MET:           -0.0690
pT l:          -0.0352
dR b3 b4:      -0.0197
sdPhi b2 b4:   -0.0141
sdEta b3 b4:   +0.0131
dR b2 l:       -0.0124
sdEta b2 b4:   +0.0120
sdPhi b2 b3:   -0.0118
sdPhi b2 l:    -0.0117
sdPhi b1 b2:   +0.0102
sdEta b1 b4:   +0.0102
dR b1 l:       +0.0032
sdEta b1 b3:   -0.0029
sdPhi b3 b4:   -0.0025
sdPhi b4 l:    +0.0025
sdEta b1 b2:   -0.0025
sdPhi b1 b4:   -0.0021
MT b2 l MET:   -0.0011
sdEta b2 b3

## $m_{Z'} = 4$ TeV

In [24]:
data50_4Tm_train, data50_4Tm_test = DA.gettraintest(4)
log_reg_4Tm = sigbg_model(
    make_pipeline(StandardScaler(), LogisticRegression(max_iter=5000)), data50_4Tm_train, data50_4Tm_test);
log_reg_4Tm.fit()

In [879]:
results(log_reg_4Tm, 4000)

TPR: 0.992; FPR: 0.0020994
Signal Significance @ L = 3000 fb^-1: 0.006
3000 5.6977 0.9919132622499102 0.002099443086662126
Mass sensitivity maximum @ L = 3000 fb^-1: 1055.26 GeV
2.8045995432509316e-05
Maximal significance of 0.048 @ threshold = 0.9990697002706163
(sig. maximized with tpr = 0.977, fpr = 2.8e-05)
3000 5.6977 0.9773012259893774 2.8045995432509316e-05
(new mass sensitivity of 1724.76 GeV)


In [252]:
indices = np.argsort(-np.abs(log_reg_4Tm.model[1].coef_[0]))
coef_sort = [[DA.cols[i], log_reg_4Tm.model[1].coef_[0,i]] for i in indices]
max_col = max([len(name) for name in DA.cols])
for name, val in coef_sort:
    sign = '+' if val >= 0 else '-'
    print('{0:{1}} {3}{2:.4f}'.format(name + ':', max_col + 3, np.abs(val), sign))

pT b1:         +5.5237
M b1 b2:       +4.9441
M j1 j2:       -2.8888
M b1 b3:       +1.4205
pT b2:         +1.2497
M b1 b4:       +1.0606
M b2 b3:       +0.8097
pT j2:         -0.5625
MT b1 l MET:   +0.5530
MT b2 l MET:   +0.5484
pT b3:         -0.5288
pT l:          -0.4691
M b2 b4:       +0.4610
MET:           -0.4015
dR b1 b2:      -0.3401
pT j1:         -0.2870
MT b4 l MET:   -0.2643
dR b1 b4:      -0.2638
MT l MET:      -0.1739
dR b2 b4:      -0.1570
M b3 b4:       +0.1453
pT b4:         +0.1407
dR b1 b3:      -0.1382
MT b3 l MET:   -0.1376
dR j1 j2:      +0.0937
dR b2 b3:      -0.0893
dR b3 l:       -0.0663
dR b4 l:       -0.0556
dR b2 l:       -0.0361
dR b1 l:       +0.0125
sdEta b1 b3:   -0.0114
sdPhi b2 b4:   -0.0103
sdPhi b2 l:    -0.0091
sdPhi b1 b2:   +0.0089
sdEta b2 b3:   -0.0076
dR b3 b4:      +0.0073
sdEta b3 b4:   +0.0071
sdPhi b3 b4:   -0.0061
sdPhi b1 b3:   +0.0057
sdPhi b3 l:    -0.0050
sdPhi b2 b3:   -0.0043
sdEta b1 b2:   -0.0032
sdEta b1 b4:   -0.0032
sdPhi b1 l:

# Noised Data

In [1121]:
perturb = 0.1

In [1122]:
data50_350Gm_train, data50_350Gm_test = DA.gettraintest(0)

data50_350Gm_train_noise = []
data50_350Gm_test_noise = []

for i, row in enumerate(data50_350Gm_train):
    if i % 75000 == 0:
        print('{}/{}'.format(i, len(data50_350Gm_train)))
    data50_350Gm_train_noise.append(np.random.multivariate_normal(row[:-1], np.diag(np.abs(perturb * row[:-1]))))
    
for i, row in enumerate(data50_350Gm_test):
    if i % 75000 == 0:
        print('{}/{}'.format(i, len(data50_350Gm_test)))
    data50_350Gm_test_noise.append(np.random.multivariate_normal(row[:-1], np.diag(np.abs(perturb * row[:-1]))))

0/1499999
75000/1499999
150000/1499999
225000/1499999
300000/1499999
375000/1499999
450000/1499999
525000/1499999
600000/1499999
675000/1499999
750000/1499999
825000/1499999
900000/1499999
975000/1499999
1050000/1499999
1125000/1499999
1200000/1499999
1275000/1499999
1350000/1499999
1425000/1499999
0/500000
75000/500000
150000/500000
225000/500000
300000/500000
375000/500000
450000/500000


In [1123]:
data50_350Gm_train_noise = np.array(data50_350Gm_train_noise)
data50_350Gm_test_noise = np.array(data50_350Gm_test_noise)
data50_350Gm_train_noise = np.append(data50_350Gm_train_noise, np.reshape(data50_350Gm_train[:,-1], (-1, 1)), axis=1)
data50_350Gm_test_noise = np.append(data50_350Gm_test_noise, np.reshape(data50_350Gm_test[:,-1], (-1, 1)), axis=1)

In [1124]:
log_reg_350Gm_noise = sigbg_model(
    make_pipeline(StandardScaler(), LogisticRegression()), data50_350Gm_train_noise, data50_350Gm_test_noise);
log_reg_350Gm_noise.fit()

In [1125]:
results(log_reg_350Gm_noise, 350)

TPR: 0.858; FPR: 0.1495372
Signal Significance @ L = 3000 fb^-1: 16.024
Mass sensitivity maximum @ L = 3000 fb^-1: 535.55 GeV
0.06783925638046397
Maximal significance of 17.496 @ threshold = 0.7582240268212531
(sig. maximized with tpr = 0.633, fpr = 0.0678393)
(new mass sensitivity of 552.01 GeV)


In [None]:
get_sig_bg_probs(model)

In [1127]:
data50_500Gm_train, data50_500Gm_test = DA.gettraintest(1)
data50_500Gm_train_noise = []
data50_500Gm_test_noise = []
for i, row in enumerate(data50_500Gm_train):
    data50_500Gm_train_noise.append(np.random.multivariate_normal(row[:-1], np.diag(np.abs(perturb * row[:-1]))))
for i, row in enumerate(data50_500Gm_test):
    data50_500Gm_test_noise.append(np.random.multivariate_normal(row[:-1], np.diag(np.abs(perturb * row[:-1]))))
data50_500Gm_train_noise = np.array(data50_500Gm_train_noise)
data50_500Gm_test_noise = np.array(data50_500Gm_test_noise)
data50_500Gm_train_noise = np.append(data50_500Gm_train_noise, np.reshape(data50_500Gm_train[:,-1], (-1, 1)), axis=1)
data50_500Gm_test_noise = np.append(data50_500Gm_test_noise, np.reshape(data50_500Gm_test[:,-1], (-1, 1)), axis=1)

In [1128]:
data50_1Tm_train, data50_1Tm_test = DA.gettraintest(2)
data50_1Tm_train_noise = []
data50_1Tm_test_noise = []
for i, row in enumerate(data50_1Tm_train):
    data50_1Tm_train_noise.append(np.random.multivariate_normal(row[:-1], np.diag(np.abs(perturb * row[:-1]))))
for i, row in enumerate(data50_1Tm_test):
    data50_1Tm_test_noise.append(np.random.multivariate_normal(row[:-1], np.diag(np.abs(perturb * row[:-1]))))
data50_1Tm_train_noise = np.array(data50_1Tm_train_noise)
data50_1Tm_test_noise = np.array(data50_1Tm_test_noise)
data50_1Tm_train_noise = np.append(data50_1Tm_train_noise, np.reshape(data50_1Tm_train[:,-1], (-1, 1)), axis=1)
data50_1Tm_test_noise = np.append(data50_1Tm_test_noise, np.reshape(data50_1Tm_test[:,-1], (-1, 1)), axis=1)

In [1130]:
data50_2Tm_train, data50_2Tm_test = DA.gettraintest(3)
data50_2Tm_train_noise = []
data50_2Tm_test_noise = []
for i, row in enumerate(data50_2Tm_train):
    data50_2Tm_train_noise.append(np.random.multivariate_normal(row[:-1], np.diag(np.abs(perturb * row[:-1]))))
for i, row in enumerate(data50_2Tm_test):
    data50_2Tm_test_noise.append(np.random.multivariate_normal(row[:-1], np.diag(np.abs(perturb * row[:-1]))))
data50_2Tm_train_noise = np.array(data50_2Tm_train_noise)
data50_2Tm_test_noise = np.array(data50_2Tm_test_noise)
data50_2Tm_train_noise = np.append(data50_2Tm_train_noise, np.reshape(data50_2Tm_train[:,-1], (-1, 1)), axis=1)
data50_2Tm_test_noise = np.append(data50_2Tm_test_noise, np.reshape(data50_2Tm_test[:,-1], (-1, 1)), axis=1)

In [1133]:
data50_4Tm_train, data50_4Tm_test = DA.gettraintest(4)
data50_4Tm_train_noise = []
data50_4Tm_test_noise = []
for i, row in enumerate(data50_4Tm_train):
    data50_4Tm_train_noise.append(np.random.multivariate_normal(row[:-1], np.diag(np.abs(perturb * row[:-1]))))
for i, row in enumerate(data50_4Tm_test):
    data50_4Tm_test_noise.append(np.random.multivariate_normal(row[:-1], np.diag(np.abs(perturb * row[:-1]))))
data50_4Tm_train_noise = np.array(data50_4Tm_train_noise)
data50_4Tm_test_noise = np.array(data50_4Tm_test_noise)
data50_4Tm_train_noise = np.append(data50_4Tm_train_noise, np.reshape(data50_4Tm_train[:,-1], (-1, 1)), axis=1)
data50_4Tm_test_noise = np.append(data50_4Tm_test_noise, np.reshape(data50_4Tm_test[:,-1], (-1, 1)), axis=1)

In [1134]:
log_reg_350Gm_noise = sigbg_model(
    make_pipeline(StandardScaler(), LogisticRegression()), data50_350Gm_train_noise, data50_350Gm_test_noise);
log_reg_350Gm_noise.fit()

In [1135]:
results(log_reg_350Gm_noise, 350)

TPR: 0.858; FPR: 0.1495372
Signal Significance @ L = 3000 fb^-1: 16.024
Mass sensitivity maximum @ L = 3000 fb^-1: 535.55 GeV
0.06783925638046397
Maximal significance of 17.496 @ threshold = 0.7582240268212531
(sig. maximized with tpr = 0.633, fpr = 0.0678393)
(new mass sensitivity of 552.01 GeV)


In [1136]:
log_reg_500Gm_noise = sigbg_model(
    make_pipeline(StandardScaler(), LogisticRegression()), data50_500Gm_train_noise, data50_500Gm_test_noise);
log_reg_500Gm_noise.fit()

In [1137]:
results(log_reg_500Gm_noise, 500)

TPR: 0.926; FPR: 0.0918707
Signal Significance @ L = 3000 fb^-1: 8.402
Mass sensitivity maximum @ L = 3000 fb^-1: 595.2 GeV
0.03793020553708081
Maximal significance of 10.122 @ threshold = 0.8479885793697
(sig. maximized with tpr = 0.719, fpr = 0.0379302)
(new mass sensitivity of 632.28 GeV)


In [1146]:
log_reg_1Tm_noise = sigbg_model(
    make_pipeline(StandardScaler(), LogisticRegression()), data50_1Tm_train_noise, data50_1Tm_test_noise);
log_reg_1Tm_noise.fit()

In [1147]:
results(log_reg_1Tm_noise, 1000)

TPR: 0.987; FPR: 0.0201651
Signal Significance @ L = 3000 fb^-1: 1.981
Mass sensitivity maximum @ L = 3000 fb^-1: 767.79 GeV
0.003453664008974718
Maximal significance of 3.584 @ threshold = 0.988283599227492
(sig. maximized with tpr = 0.743, fpr = 0.0034537)
(new mass sensitivity of 911.25 GeV)


In [1148]:
log_reg_2Tm_noise = sigbg_model(
    make_pipeline(StandardScaler(), LogisticRegression()), data50_2Tm_train_noise, data50_2Tm_test_noise);
log_reg_2Tm_noise.fit()

In [1149]:
results(log_reg_2Tm_noise, 2000)

TPR: 0.996; FPR: 0.0030128
Signal Significance @ L = 3000 fb^-1: 0.268
Mass sensitivity maximum @ L = 3000 fb^-1: 1006.67 GeV
2.4038461538461542e-05
Maximal significance of 1.92 @ threshold = 0.9998221005410745
(sig. maximized with tpr = 0.669, fpr = 2.4e-05)
(new mass sensitivity of 1611.63 GeV)


In [1150]:
log_reg_4Tm_noise = sigbg_model(
    make_pipeline(StandardScaler(), LogisticRegression()), data50_4Tm_train_noise, data50_4Tm_test_noise);
log_reg_4Tm_noise.fit()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [1151]:
results(log_reg_4Tm_noise, 4000)

TPR: 0.992; FPR: 0.0021515
Signal Significance @ L = 3000 fb^-1: 0.006
Mass sensitivity maximum @ L = 3000 fb^-1: 1051.84 GeV
9.215112784967345e-05
Maximal significance of 0.027 @ threshold = 0.9894230646509026
(sig. maximized with tpr = 0.984, fpr = 9.22e-05)
(new mass sensitivity of 1531.91 GeV)
