In [1]:
import pandas as pd
import numpy as np
import itertools, csv

massWidth = 30 #GeV

In [2]:
# *** 0. Import Dataset
qcd_raw = pd.read_csv('../higgsReconstruction/EventPlotting/qcd_outputDataForLearning.csv')
hh_raw = pd.read_csv('../higgsReconstruction/EventPlotting/dihiggs_outputDataForLearning.csv')

hh_raw.head()
print(len(hh_raw), "rows of dihiggs data")
#print(hh_raw.columns)
qcd_raw.head()
print(len(qcd_raw), "rows of qcd data")

#variableNames = ['hh_mass', 'h1_mass', 'h2_mass']
#variableNames = ['deltaR(h1, h2)', 'deltaR(h1 jets)', 'deltaR(h2 jets)']
variableNames = ['hh_mass', 'h1_mass', 'h2_mass', 'deltaR(h1, h2)', 'deltaR(h1 jets)', 'deltaR(h2 jets)']

hh_reducedData  = hh_raw[variableNames]
qcd_reducedData = qcd_raw[variableNames]
print(hh_reducedData.columns, variableNames[0])
hh_reducedData.hist(column=variableNames[2], bins=100)
qcd_reducedData.hist(column=variableNames[2], bins=100)


4605 rows of dihiggs data
1703 rows of qcd data
Index(['hh_mass', 'h1_mass', 'h2_mass', 'deltaR(h1, h2)', 'deltaR(h1 jets)',
       'deltaR(h2 jets)'],
      dtype='object') hh_mass


array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fa46aae6b00>]],
      dtype=object)

In [64]:
def returnCutValueByConstantEfficiency( _variable, _signal, _background, _eff, _inequality = '>'):
    """return a cut value based on keeping some constant efficiency of signal"""

    _bestCutValue = -1
    _nTotalSignal =len(_signal) 
    _nTotalBackground =len(_background) 
    _cuts = []
    
    _minVal = int(min(min(_background), min(_signal))) if 'mass' not in _variable else int(min(min(_background), min(_signal))) - int(min(min(_background), min(_signal)))%5
    _maxVal = int(max(max(_background), max(_signal))) if 'mass' not in _variable else int(max(max(_background), max(_signal))) - int(max(max(_background), max(_signal)))%5
    if 'mass' in _variable:
        #_cuts = list(range(_minVal, _maxVal, _stepSize))
        _cuts = list(range(0, _maxVal, 5))
        #print(_maxVal, max(_background), max(_signal))
    elif 'deltaR' in _variable:
        #_cuts = np.linspace(_minVal, _maxVal, 100)
        _cuts = np.linspace(0, 5, 101)


    for iCutValue in _cuts:
        if _inequality == '<':
            _nSignal = sum( value < iCutValue for value in _signal)
            _nBackground = sum( value < iCutValue for value in _background)
        elif _inequality == '>':
            _nSignal = sum( value > iCutValue for value in _signal)
            _nBackground = sum( value > iCutValue for value in _background)
        else:
            print("Unknown inequality operator {0}. EXITING".format(_inequality))
            return _bestCutValue, _nTotalSignal, _nTotalBackground
        
        # safety check to avoid division by 0
        if _nBackground == 0:
            continue
        
        if _inequality == '<':
            if _nSignal / _nTotalSignal >= _eff:
                _bestCutValue = iCutValue
                return _bestCutValue, _nSignal, _nBackground
        elif _inequality== '>':
            if _nSignal / _nTotalSignal < _eff: # passed threshold so return previous cut
                _nSignalBest = sum( value > _bestCutValue for value in _signal)
                _nBackgroundBest = sum( value > _bestCutValue for value in _background)
                return _bestCutValue, _nSignalBest, _nBackgroundBest
            else:
                _bestCutValue = iCutValue
            
    return _bestCutValue, -1, -1
    
    
def returnBestCutValue( _variable, _signal, _background, _method='S/B'):
    """find best cut according to user-specified significance metric"""
    
    _bestSignificance = -1
    _bestCutValue = -1
    _massWidth = massWidth #GeV
    _nTotalSignal =len(_signal) 
    _nTotalBackground =len(_background) 
    _cuts = []
    
    _minVal = int(min(min(sortedBackground), min(sortedSignal))) if 'mass' not in _variable else int(min(min(sortedBackground), min(sortedSignal))) - int(min(min(sortedBackground), min(sortedSignal)))%5
    _maxVal = int(max(max(sortedBackground), max(sortedSignal))) if 'mass' not in _variable else int(max(max(sortedBackground), max(sortedSignal))) - int(max(max(sortedBackground), max(sortedSignal)))%5
    if 'mass' in _variable:
        _stepSize = 0.05 if 'mass' not in _variable else 5
        _cuts = list(range(_minVal, _maxVal, _stepSize))
    else:
        _cuts = np.linspace(_minVal, _maxVal, 100)
    
    for iCutValue in _cuts:
        if 'mass' in _variable:
            _nSignal = sum( (value > iCutValue and value < (iCutValue+_massWidth)) for value in _signal) 
            _nBackground = sum( (value > iCutValue and value < (iCutValue+_massWidth)) for value in _background)
            #_nSignal = sum( (value > iCutValue ) for value in _signal)
            #_nBackground = sum( (value > iCutValue) for value in _background)
        else:
            _nSignal = sum( value < iCutValue for value in _signal)
            _nBackground = sum( value < iCutValue for value in _background)

        # temporary fix since samples with different number of events
        #_nSignal = _nSignal / _nTotalSignal
        #_nBackground = _nBackground / _nTotalBackground
        _nSignal = _nSignal * (_nTotalBackground / _nTotalSignal )
        #_nBackground = _nBackground / _nTotalBackground
        
        # safety check to avoid division by 0
        if _nBackground == 0:
            continue
        
        #if _method == 'S/sqrt(B)':
        #    print(_nSignal, _nBackground, iCutValue, (_nSignal / np.sqrt(_nBackground)), (_nSignal / np.sqrt(_nSignal + _nBackground)))
        
        if _method == 'S/B' and (_nSignal / _nBackground) > _bestSignificance:
            _bestSignificance = (_nSignal / _nBackground)
            _bestCutValue = iCutValue
        elif _method == 'S/sqrt(B)' and (_nSignal / np.sqrt(_nBackground)) > _bestSignificance:
            _bestSignificance = (_nSignal / np.sqrt(_nBackground))
            _bestCutValue = iCutValue
        elif _method == 'S/sqrt(S+B)' and (_nSignal / np.sqrt(_nSignal + _nBackground)) > _bestSignificance:
            _bestSignificance = (_nSignal / np.sqrt(_nSignal + _nBackground))
            _bestCutValue = iCutValue
        
    return _bestSignificance, _bestCutValue


def returnSignificanceOrderedCutDict( _method, _varNames, _signalDataFrame, _backgroundDataFrame):
    """function to return list of cuts ordered by descending significance"""
    
    _orderedVariableAndCutDict = {}
    _unprocessedVariables = _varNames
    _signalAfterCuts = _signalDataFrame
    _backgroundAfterCuts = _backgroundDataFrame
    
    while len(_unprocessedVariables)>0:
        _iBestCut = -1
        _iBestSignificance = -1
        _iBestVariable = ''
        print('iteration {0}, signal has {1} rows'.format(len(_unprocessedVariables), len(_signalAfterCuts)))
        print('iteration {0}, background has {1} rows'.format(len(_unprocessedVariables), len(_backgroundAfterCuts)))
        
        for iVariable in _unprocessedVariables:
            _sortedSignal = np.sort(_signalDataFrame[iVariable].values)
            _sortedBackground = np.sort(_backgroundDataFrame[iVariable].values)
            #print(_sortedSignal)
            _tempSignificance, _tempCut = returnBestCutValue( iVariable, _sortedSignal, _sortedBackground, _method)
            #print ( iVariable, _tempSignificance, _tempCut )
                
            # most significant 1D variable so far in this iteration
            if _tempSignificance > _iBestSignificance:
                _iBestSignificance = _tempSignificance
                _iBestCut = _tempCut
                _iBestVariable = iVariable
        
        print('Iteration {0} chose variable {1} with significance {2} at cut {3}'.format(int(len(_varNames)-len(_unprocessedVariables)), _iBestVariable, _iBestSignificance, _iBestCut))
        _unprocessedVariables.remove(_iBestVariable)
        _orderedVariableAndCutDict[_iBestVariable] = [_iBestCut, _iBestSignificance]
        if 'mass' in _iBestVariable:
            _signalAfterCuts = _signalAfterCuts[ (_signalAfterCuts[_iBestVariable] > _iBestCut) & (_signalAfterCuts[_iBestVariable]< (_iBestCut + massWidth))]
            _backgroundAfterCuts = _backgroundAfterCuts[ (_backgroundAfterCuts[_iBestVariable] > _iBestCut) & (_backgroundAfterCuts[_iBestVariable]< (_iBestCut + massWidth))]
        else:
            _signalAfterCuts = _signalAfterCuts[ _signalAfterCuts[_iBestVariable] < _iBestCut ]
            _backgroundAfterCuts = _backgroundAfterCuts[ _backgroundAfterCuts[_iBestVariable] < _iBestCut ]
            
    return _orderedVariableAndCutDict

In [65]:
for iColumn in range(0, len(hh_reducedData.columns) ):
    varName = variableNames[iColumn]
    sortedSignal = np.sort(hh_reducedData[varName].values)
    sortedBackground = np.sort(qcd_reducedData[varName].values)
    
    bestCut, significance = returnBestCutValue( varName, sortedSignal, sortedBackground, 'S/B')
    print ( varName, bestCut, significance )
    bestCut, significance = returnBestCutValue( varName, sortedSignal, sortedBackground, 'S/sqrt(B)')
    print ( varName, bestCut, significance )
    bestCut, significance = returnBestCutValue( varName, sortedSignal, sortedBackground, 'S/sqrt(S+B)')
    print ( varName, bestCut, significance )
    
print("=====================================")
    

hh_mass 1.0007666936512138 115
1703.0 1703 90 41.26742056392669 29.180472922829747
1701.8905537459282 1703 95 41.240536250855726 29.166213427263237
1701.5207383279044 1702 100 41.243685718056696 29.16574310072618
1701.1509229098806 1701 105 41.24684059671171 29.16527376617926
1699.671661237785 1699 110 41.235222662205366 29.154824285399837
1699.301845819761 1698 115 41.23838854461273 29.15435660551655
1697.8225841476656 1698 120 41.20249005621699 29.135321182853577
1695.2338762214983 1696 125 41.16391743407614 29.110572829050472
1692.2753528773071 1695 130 41.10419790629386 29.076744359865977
1690.7960912052117 1694 135 41.080387559652515 29.061965291012037
1687.8375678610205 1692 140 41.03273541431246 29.032386354801968
1686.358306188925 1692 145 40.996773332441904 29.013291522682465
1682.660152008686 1688 150 40.9553073017843 28.982705685846394
1678.5921824104234 1684 155 40.90478860608117 28.94730231057491
1674.5242128121606 1681 160 40.84205397102831 28.907547180550136
1667.1279044

49.18545059717698 167 745 3.8060844374755582 3.345210283886906
47.706188925081435 163 750 3.736637100466979 3.286518472953881
46.966558089033654 162 755 3.6900413015275104 3.2490047004473896
45.857111834962 159 760 3.6367055257170886 3.203916402440219
45.48729641693811 156 765 3.641898398414527 3.2045443769363113
43.63821932681867 151 770 3.5512279249489587 3.1279007151417004
42.8985884907709 149 775 3.514389303685025 3.0967568898271867
41.78914223669924 147 780 3.4467103599374096 3.0414098651080983
40.30988056460369 143 785 3.370881554648995 2.9772724186140116
38.83061889250814 141 790 3.2701280296527595 2.895626170080047
38.09098805646037 140 795 3.219276062273059 2.8543108371474304
36.24191096634093 135 800 3.1192070579415643 2.769529887929503
34.02301845819761 132 805 2.9613236508702405 2.6405152985213696
32.91357220412595 128 810 2.909176262326315 2.594649397585162
32.173941368078175 126 815 2.8662825090833666 2.5582135670250197
29.585233441910965 124 820 2.6568323939335303 2.3872

1.1094462540716612 9 1400 0.36981541802388707 0.3489334337980057
1.1094462540716612 8 1405 0.3922484848080424 0.3675871146078433
1.1094462540716612 7 1410 0.4193312687522566 0.3895925722130611
1.1094462540716612 7 1415 0.4193312687522566 0.3895925722130611
1.1094462540716612 7 1420 0.4193312687522566 0.3895925722130611
1.1094462540716612 6 1425 0.45292953658629237 0.41609105585550243
1.1094462540716612 6 1430 0.45292953658629237 0.41609105585550243
1.1094462540716612 5 1435 0.4961594482973474 0.4488542522485153
1.1094462540716612 5 1440 0.4961594482973474 0.4488542522485153
1.1094462540716612 5 1445 0.4961594482973474 0.4488542522485153
1.1094462540716612 5 1450 0.4961594482973474 0.4488542522485153
1.1094462540716612 5 1455 0.4961594482973474 0.4488542522485153
1.1094462540716612 5 1460 0.4961594482973474 0.4488542522485153
1.1094462540716612 4 1465 0.5547231270358306 0.49081672193464354
1.1094462540716612 4 1470 0.5547231270358306 0.49081672193464354
1.1094462540716612 4 1475 0.55472

0.36981541802388707 1 2085 0.36981541802388707 0.3159759209817217
0.36981541802388707 1 2090 0.36981541802388707 0.3159759209817217
0.36981541802388707 1 2095 0.36981541802388707 0.3159759209817217
0.36981541802388707 1 2100 0.36981541802388707 0.3159759209817217
0.36981541802388707 1 2105 0.36981541802388707 0.3159759209817217
0.36981541802388707 1 2110 0.36981541802388707 0.3159759209817217
0.36981541802388707 1 2115 0.36981541802388707 0.3159759209817217
0.36981541802388707 1 2120 0.36981541802388707 0.3159759209817217
hh_mass 41.26742056392669 90
hh_mass 29.180472922829747 90
h1_mass 1.1708045178729216 70
1703.0 1703 15 41.26742056392669 29.180472922829747
1702.630184581976 1703 20 41.25845912623637 29.175720187786155
1701.5207383279044 1701 25 41.25580730118926 29.170028688193128
1695.6036916395221 1696 30 41.17289734625635 29.11533582679039
1687.4677524429967 1667 35 41.330216693826074 29.13558024208863
1678.5921824104234 1625 40 41.640747165759414 29.20464834947562
1668.60716612

0.36981541802388707 4 660 0.18490770901194353 0.17691044181318238
0.0 4 665 0.0 0.0
0.0 4 670 0.0 0.0
0.0 3 675 0.0 0.0
0.0 3 680 0.0 0.0
0.0 3 685 0.0 0.0
0.0 3 690 0.0 0.0
0.0 2 695 0.0 0.0
0.0 2 700 0.0 0.0
0.0 2 705 0.0 0.0
0.0 2 710 0.0 0.0
0.0 2 715 0.0 0.0
h1_mass 43.05719386272135 60
h1_mass 29.491188088811576 55
h2_mass 1.2279099481440434 70
1703.0 1703 10 41.26742056392669 29.180472922829747
1702.630184581976 1703 15 41.25845912623637 29.175720187786155
1700.4112920738328 1701 20 41.228907187756626 29.15576262985425
1696.713137893594 1690 25 41.27290808980849 29.155414250414218
1693.014983713355 1664 30 41.50344441711235 29.220265785345635
1677.4827361563516 1615 35 41.741859888325465 29.234542786992904
1664.5391965255158 1565 40 42.07623339209835 29.290295045927806
1650.116395222584 1511 45 42.45045477234379 29.349069437298624
1630.5161780673182 1447 50 42.86381745175235 29.391715821675312
1603.1498371335504 1376 55 43.21802319968784 29.371622777135723
1571.3457111834962 132

211.9042345276873 86 1.6969696969696968 22.850221262672328 12.2772555899484
229.28555917480998 91 1.7676767676767675 24.03565749622594 12.811737268569857
249.62540716612378 103 1.8383838383838382 24.596322225446453 13.293274901600329
271.07470141150924 113 1.909090909090909 25.50056285086593 13.831877603689707
290.30510314875136 118 1.9797979797979797 26.724750940509512 14.366873446534544
311.7543973941368 127 2.0505050505050506 27.663729386678522 14.883389483499851
333.94332247557 143 2.121212121212121 27.925743521541676 15.291122846138517
361.67947882736155 152 2.191919191919192 29.336079524604067 15.957974254540034
394.5930510314875 166 2.2626262626262625 30.626360399815486 16.665777440118287
428.985884907709 180 2.333333333333333 31.974720001384615 17.383586754978218
464.48816503800214 197 2.404040404040404 33.09341151325496 18.059828157472435
505.5376764386536 216 2.4747474747474745 34.39748202852564 18.82019076644591
551.0249728555917 251 2.5454545454545454 34.780387883356845 19.

1005.528121606949 665 1.8181818181818181 38.99270807755795 24.601825036337452
1029.9359391965254 675 1.8686868686868687 39.64225278287893 24.943430377156933
1059.1513572204126 696 1.9191919191919191 40.14702858813675 25.281377872613536
1087.257328990228 705 1.9696969696969697 40.9484799888137 25.68219631140838
1111.2953311617807 724 2.0202020202020203 41.30096572021789 25.940390518712302
1132.0049945711182 749 2.0707070707070705 41.36256228834661 26.10077294478208
1153.8241042345276 760 2.121212121212121 41.85359922133041 26.37476491718152
1175.273398479913 779 2.1717171717171717 42.10853939621784 26.585586525714604
1197.0925081433224 794 2.2222222222222223 42.48322343427268 26.827610583452262
1220.0210640608034 818 2.2727272727272725 42.657036165393805 27.024831499257473
1244.798697068404 844 2.323232323232323 42.84773819597594 27.236470704685257
1266.2479913137893 866 2.3737373737373737 43.028858347625345 27.42204962635363
1285.8482084690554 895 2.4242424242424243 42.98116535958797 2

1231.11552660152 1045 2.474747474747475 38.08383398936496 25.80486895383249
1266.6178067318133 1063 2.525252525252525 38.848920633205594 26.242381854222113
1298.7917480998913 1088 2.5757575757575757 39.375408552179834 26.58473017968886
1328.0071661237785 1125 2.6262626262626263 39.59352397412833 26.81334585919204
1351.3055374592834 1150 2.676767676767677 39.847826361188275 27.019056791834778
1376.0831704668838 1173 2.727272727272727 40.178681177688475 27.255407562033216
1400.860803474484 1201 2.7777777777777777 40.422528886228974 27.463314762258474
1431.555483170467 1230 2.8282828282828283 40.81836727290243 27.748558227129564
1457.072747014115 1257 2.878787878787879 41.09732893483191 27.96858488965187
1482.9598262757872 1293 2.929292929292929 41.24108890458046 28.146396265816353
1511.0657980456026 1320 2.9797979797979797 41.59071725924889 28.399349612593298
1536.5830618892508 1356 3.0303030303030303 41.72786808583561 28.570189578116878
1559.5116178067317 1380 3.080808080808081 41.98064

In [9]:
orderedCuts_SoverB = returnSignificanceOrderedCutDict( 'S/B', variableNames.copy(), hh_reducedData.copy(), qcd_reducedData.copy())
print (orderedCuts_SoverB)
orderedCuts_SoverSqrtB = returnSignificanceOrderedCutDict( 'S/sqrt(B)', variableNames.copy(), hh_reducedData.copy(), qcd_reducedData.copy())
print (orderedCuts_SoverSqrtB)
orderedCuts_SoverSqrtSB = returnSignificanceOrderedCutDict( 'S/sqrt(S+B)', variableNames.copy(), hh_reducedData.copy(), qcd_reducedData.copy())
print (orderedCuts_SoverSqrtSB)

iteration 3, signal has 4605 rows
iteration 3, background has 1703 rows
Iteration 0 chose variable deltaR(h1, h2) with significance 3.205066956207021 at cut 0.45454545454545453
iteration 2, signal has 26 rows
iteration 2, background has 3 rows
Iteration 0 chose variable deltaR(h1 jets) with significance 1.5422089772911034 at cut 1.9696969696969697
iteration 1, signal has 17 rows
iteration 1, background has 2 rows
Iteration 0 chose variable deltaR(h2 jets) with significance 1.1937424155329883 at cut 2.5757575757575757
{'deltaR(h1, h2)': [0.45454545454545453, 3.205066956207021], 'deltaR(h1 jets)': [1.9696969696969697, 1.5422089772911034], 'deltaR(h2 jets)': [2.5757575757575757, 1.1937424155329883]}
iteration 3, signal has 4605 rows
iteration 3, background has 1703 rows
Iteration 0 chose variable deltaR(h1, h2) with significance 44.33513291962043 at cut 3.282828282828283
iteration 2, signal has 3881 rows
iteration 2, background has 1048 rows
Iteration 0 chose variable deltaR(h1 jets) with

In [45]:
def getCutsForSpecifiedEfficiency( _signal, _background, _eff, _inequality = '<'):
    """get cuts for all variables given user-specified efficency"""
    variablesAndCuts_ = {}
    
    print("========== Efficiency {0}% , Using {1} =========".format(_eff, _inequality) )
    for iColumn in range(0, len(_signal.columns) ):
        varName = variableNames[iColumn]
        sortedSignal = np.sort(_signal[varName].values)
        sortedBackground = np.sort(_background[varName].values)
    
        cutVal, nSig, nBkg = returnCutValueByConstantEfficiency( varName, sortedSignal, sortedBackground, _eff, _inequality)
        print('Cut of {4} {0} on {1} yields nSig = {2} and nBkg = {3}'.format(round(cutVal,2), varName, nSig, nBkg, _inequality))    
        variablesAndCuts_[varName] = round(cutVal,2)
    
    return variablesAndCuts_


def returnNumberSignalAndBackgroundAfterCuts( _signal, _background, _cuts, _useVariableString = '', _inequality = '>', _significanceMetric = 'S/sqrt(B)'):
    """return number of signal and background (and maybe some significance score?) for a passed set of cuts"""
    
    _unprocessedVariables = [x for x in _cuts.keys() if _useVariableString in x] #list(_cuts.keys())
    _signalAfterCuts = _signal
    _backgroundAfterCuts = _background
    _nTotalSignal = len(_signalAfterCuts)
    _nTotalBackground = len(_backgroundAfterCuts)
    
    while len(_unprocessedVariables)>0:
        print('iteration {0}, signal has {1} rows'.format(len(_unprocessedVariables), len(_signalAfterCuts)))
        print('iteration {0}, background has {1} rows'.format(len(_unprocessedVariables), len(_backgroundAfterCuts)))
        
        for iVariable in _unprocessedVariables:
            _cutValue = _cuts[iVariable]
            _sortedSignal = np.sort(_signalAfterCuts[iVariable].values)
            _sortedBackground = np.sort(_backgroundAfterCuts[iVariable].values)

            _signalAfterCuts = _signalAfterCuts[ (_signalAfterCuts[iVariable] > _cutValue)]
            _backgroundAfterCuts = _backgroundAfterCuts[ (_backgroundAfterCuts[iVariable] > _cutValue)]

            _nSignal = len(_signalAfterCuts) * (_nTotalBackground / _nTotalSignal )
            _nBackground = len(_backgroundAfterCuts) 
            print('Iteration {0} chose variable {1} with N_signal = {2} ({4}) and N_background = {3} ({5})'.format(int(len(_unprocessedVariables)), iVariable, len(_signalAfterCuts), len(_backgroundAfterCuts), round(_nSignal,1), _nBackground))
            _unprocessedVariables.remove(iVariable)
    
    _nSignal = len(_signalAfterCuts) * (_nTotalBackground / _nTotalSignal )
    _nBackground = len(_backgroundAfterCuts) 
    print('{0} = {1}'.format(_significanceMetric, round( _nSignal / np.sqrt(_nBackground), 2)))
    
    return 

In [40]:
#getCutsForSpecifiedEfficiency(hh_reducedData, qcd_reducedData, 0.80, '<')
#getCutsForSpecifiedEfficiency(hh_reducedData, qcd_reducedData, 0.85, '<')
#getCutsForSpecifiedEfficiency(hh_reducedData, qcd_reducedData, 0.90, '<')

cutsFor80PercentEfficiency = getCutsForSpecifiedEfficiency(hh_reducedData.copy(), qcd_reducedData.copy(), 0.80, '>')
cutsFor85PercentEfficiency = getCutsForSpecifiedEfficiency(hh_reducedData.copy(), qcd_reducedData.copy(), 0.85, '>')
cutsFor90PercentEfficiency = getCutsForSpecifiedEfficiency(hh_reducedData.copy(), qcd_reducedData.copy(), 0.90, '>')

Cut of > 270 on hh_mass yields nSig = 3733 and nBkg = 1525
Cut of > 85 on h1_mass yields nSig = 3775 and nBkg = 1242
Cut of > 75 on h2_mass yields nSig = 3810 and nBkg = 1161
Cut of > 2.1 on deltaR(h1, h2) yields nSig = 3722 and nBkg = 1567
Cut of > 0.95 on deltaR(h1 jets) yields nSig = 3733 and nBkg = 1329
Cut of > 1.1 on deltaR(h2 jets) yields nSig = 3709 and nBkg = 1229
Cut of > 250 on hh_mass yields nSig = 3974 and nBkg = 1581
Cut of > 80 on h1_mass yields nSig = 3963 and nBkg = 1275
Cut of > 70 on h2_mass yields nSig = 4001 and nBkg = 1205
Cut of > 1.85 on deltaR(h1, h2) yields nSig = 3916 and nBkg = 1598
Cut of > 0.85 on deltaR(h1 jets) yields nSig = 3923 and nBkg = 1374
Cut of > 1.0 on deltaR(h2 jets) yields nSig = 3919 and nBkg = 1284
Cut of > 230 on hh_mass yields nSig = 4165 and nBkg = 1617
Cut of > 70 on h1_mass yields nSig = 4217 and nBkg = 1332
Cut of > 60 on h2_mass yields nSig = 4249 and nBkg = 1321
Cut of > 1.5 on deltaR(h1, h2) yields nSig = 4163 and nBkg = 1637
Cut of

In [50]:
returnNumberSignalAndBackgroundAfterCuts( hh_reducedData.copy(), qcd_reducedData.copy(), cutsFor90PercentEfficiency, 'mass')
returnNumberSignalAndBackgroundAfterCuts( hh_reducedData.copy(), qcd_reducedData.copy(), cutsFor90PercentEfficiency, 'elta')
returnNumberSignalAndBackgroundAfterCuts( hh_reducedData.copy(), qcd_reducedData.copy(), cutsFor90PercentEfficiency)

iteration 3, signal has 4605 rows
iteration 3, background has 1703 rows
Iteration 3 chose variable hh_mass with N_signal = 4165 (1540.3) and N_background = 1617 (1617)
Iteration 2 chose variable h2_mass with N_signal = 3959 (1464.1) and N_background = 1271 (1271)
iteration 1, signal has 3959 rows
iteration 1, background has 1271 rows
Iteration 1 chose variable h1_mass with N_signal = 3798 (1404.6) and N_background = 1123 (1123)
S/sqrt(B) = 41.91
iteration 3, signal has 4605 rows
iteration 3, background has 1703 rows
Iteration 3 chose variable deltaR(h1, h2) with N_signal = 4163 (1539.5) and N_background = 1637 (1637)
Iteration 2 chose variable deltaR(h2 jets) with N_signal = 3787 (1400.5) and N_background = 1304 (1304)
iteration 1, signal has 3787 rows
iteration 1, background has 1304 rows
Iteration 1 chose variable deltaR(h1 jets) with N_signal = 3531 (1305.8) and N_background = 1182 (1182)
S/sqrt(B) = 37.98
iteration 6, signal has 4605 rows
iteration 6, background has 1703 rows
Itera

In [55]:
4165*1703/4605

1540.2812160694896