In [1]:
import ROOT
from OutFunc import OutFunc

In [2]:
ROOT.TMVA.Tools.Instance()

<cppyy.gbl.TMVA.Tools object at 0x5e7a3c3f7160>

In [3]:
#Set up an output file and book some histograms
#histFile = ROOT.TFile("analysis.root", "RECREATE")
hSig = ROOT.TH1D("hSig", "BDT, signal", 100, -1.0, 1.0)
hBkg = ROOT.TH1D("hBkg", "BDT, background", 100, -1.0, 1.0)

#TList is collection object 
# hList = ROOT.TList()
# hList.Add(hSig)
# hList.Add(hBkg)

In [4]:
# Set up the OutFunc object.  First argument must be one of the classifiers.
# 4th argument is offset for contour.
# 5th argument is bool array indicating which variables were used in training
dir = "dataset/weights/"
prefix = "tmvaTest"
name = "BDTfinal"
tCut = 0.0
useVar = [False, True, True, True, True, True, True]

testStat = OutFunc(name, dir, prefix, tCut, useVar)

                         : Booking "BDTfinal" of type "BDT" from dataset/weights//tmvaTest_BDTfinal.weights.xml.
                         : Reading weight file: dataset/weights//tmvaTest_BDTfinal.weights.xml
<HEADER> DataSetInfo              : [Default] : Added class "Signal"
<HEADER> DataSetInfo              : [Default] : Added class "Background"
                         : Booked classifier "BDTfinal" of type: "BDT"


In [5]:
#Open Input File and get trees
inputFile = ROOT.TFile("../modelInputData/testData.root")
inputFile.ls()
sig = inputFile.Get("sig")
bkg = inputFile.Get("bkg")

treeVec = [sig, bkg]

TFile**		../modelInputData/testData.root	
 TFile*		../modelInputData/testData.root	
  KEY: TTree	bkg;7	bkg [current cycle]
  KEY: TTree	bkg;6	bkg [backup cycle]
  KEY: TTree	sig;1	sig


In [6]:
nSig = 0
nBkg = 0
nSigAcc = 0
nBkgAcc = 0
tc = 0.0

#Loop over signal(i = 0) and background(i = 1)
for i, tree in enumerate(treeVec):
    numEntries = tree.GetEntries()
    if i == 0:
        nSig = numEntries
    else:
        nBkg = numEntries
    
    print(f"Tree: {'sig' if i==0 else 'bkg'}, Entries: {numEntries}")

    for j in range(numEntries):
        tree.GetEntry(j)
        #Apply our Test statistic
        t = testStat.val(tree)

        if i == 0:  #Signal
            hSig.Fill(t)
            if t > tc:
                nSigAcc += 1
        else:       #Background
            hBkg.Fill(t)
            if t > tc:
                nBkgAcc += 1

Tree: sig, Entries: 419649
Tree: bkg, Entries: 6261950
                         : Rebuilding Dataset Default


In [7]:
#Computing Efficiencies
#Power of test (true positive rate)
epsSig = float(nSigAcc) / nSig if nSig else 0
print(f"nSigAcc, nSig = {nSigAcc}, {nSig}")
print(f"Signal efficiency (power of test) = {epsSig}")

nSigAcc, nSig = 395156, 419649
Signal efficiency (power of test) = 0.941634556498407


### Question 1(a)

In [8]:
#Size of the test (false positive rate)
epsBkg = float(nBkgAcc) / nBkg if nBkg else 0
print(f"nBkgAcc, nBkg = {nBkgAcc}, {nBkg}")
print(f"Background rejection (size of test) = {epsBkg}")

nBkgAcc, nBkg = 185036, 6261950
Background rejection (size of test) = 0.029549261811416573


### Question 1(b)

Signal purity of the sample
$$
P(s | t > t_c) = \frac{P(t > t_c | s) P(s)}{P(t > t_c | s) P(s) + P(t > t_c | b) P(b)}
$$
Also, it is mentioned to assume prior probabilities to be equal, i.e., $P(s) = P(b) = 0.5$

In [9]:
print(f"The signal purity is equal to = {(epsSig * 0.5)/(epsSig*0.5 + epsBkg*0.5)}")

The signal purity is equal to = 0.9695739763633604


In [10]:
#Save Histograms 
histFile = ROOT.TFile("analysisfinal.root", "RECREATE")
hSig.Write()
hBkg.Write()
histFile.Close()


inputFile.Close()

In [None]:
#ROC curves can be generated