In [1]:
import ROOT as R
import uproot

import import_ipynb
import setPath
from os import listdir
from os.path import isfile, join
from Input.OpenDataPandaFramework13TeV import *

import time

import pandas as pd
import numpy as np

%jsroot on

R.gInterpreter.ProcessLine('#include "Analysis/AnalysisSelector.cpp"')


Welcome to JupyROOT 6.24/02
importing Jupyter notebook from setPath.ipynb
importing Jupyter notebook from /storage/galaxy/jobs_directory/005/5166/working/jupyter/Input/OpenDataPandaFramework13TeV.ipynb
This library contains handy functions to ease the access and use of the 13TeV ATLAS OpenData release

getBkgCategories()
	 Dumps the name of the various background cataegories available 
	 as well as the number of samples contained in each category.
	 Returns a vector with the name of the categories

getSamplesInCategory(cat)
	 Dumps the name of the samples contained in a given category (cat)
	 Returns dictionary with keys being DSIDs and values physics process name from filename.

getMCCategory()
	 Returns dictionary with keys DSID and values MC category

initialize(indir)
	 Collects all the root files available in a certain directory (indir)

getSkims(indir)
	 Prints all available skims in the directory



Setting luminosity to 10064 pb^-1

###############################
#### Backgrou

0

In [2]:
opendatadir = "/storage/shared/data/fys5555/ATLAS_opendata/"
analysis = "2lep"

In [3]:
# Manipulate the simulated Monte Carlo data and choose which background and signal samples you want to use
mcfiles = initialize(opendatadir+"/"+analysis+"/MC")
datafiles = initialize(opendatadir+"/"+analysis+"/Data")
allfiles = z = {**mcfiles, **datafiles}
Backgrounds = getBkgCategories()

Backgrounds.remove('Zjets')
Backgrounds.remove('Wjets')
#Backgrounds.remove('Zjetsincl')

#Backgrounds = ['Higgs']

####################################################################################################
BACKGROIUND SAMPLES
####################################################################################################

###############################
#### Background categories ####
###############################
Category             N(samples)
-------------------------------
Diboson                      10
Higgs                        20
Wjets                        42
Wjetsincl                     6
Zjets                        42
Zjetsincl                     3
singleTop                     6
topX                          3
ttbar                         1


In [4]:
MCcat = {}
for cat in allfiles:
    for dsid in allfiles[cat]["dsid"]:
        try:
            MCcat[int(dsid)] = cat
        except:
            continue

In [5]:
files = []
for Category in Backgrounds:
    Type = mcfiles[Category]['type']
    for File in mcfiles[Category]['files']:
        files.append(File)
        
for File in datafiles['data']['files']:
    files.append(File)

In [6]:
chain = R.TChain('mini') 

for File in files:
        chain.Add(File) 

In [7]:
if not os.path.exists('./Histograms'):
    os.makedirs('./Histograms')
if not os.path.exists('./Histograms/MC/'):
    os.makedirs('./Histograms/MC')
if not os.path.exists('./Histograms/Data/'):
    os.makedirs('./Histograms/Data')

In [8]:
selection = R.AnalysisSelector(chain, analysis)

Info in <TUnixSystem::ACLiC>: creating shared library /storage/galaxy/jobs_directory/005/5166/working/jupyter/project3/./HWWAnalysis_C.so


Process the chains for both the Monte Carlo and Data at the same time, the relevant histograms and features for machine learning can be aquired by calling the TChain Process class from the SelectorProxy class via the Selector function

- R.SelectorProxy().Selector()."function"

The relevant functions are

- .GetHistogram((string) Category)

In [9]:
%%time

selection.Process()

CPU times: user 4min 29s, sys: 6.72 s, total: 4min 35s
Wall time: 4min 41s
-------------------------------------------
Processing MC and Data
Number of events to process: 83344318
-------------------------------------------
Total number of processed events: 83344318
Number of events in eu channel: 297671




## Setup the plots

In [10]:
colours = {}

colours["Diboson"] = R.kGreen; 
colours["Zjets"] = R.kYellow; 
colours["ttbar"] = R.kRed;
colours["singleTop"] = R.kBlue-7; 
colours["Wjets"] = R.kBlue+3; 
colours["topX"] = R.kOrange+1; 
colours["Higgs"] = R.kMagenta; 
colours["Wjetsincl"] = R.kBlue-10;
colours["Zjetsincl"] = R.kYellow-9;

### Get histograms from C++ class

In [118]:
Backgrounds.remove('Higgs')
Backgrounds.remove('Wjetsincl')
Backgrounds.remove('Zjetsincl')
Backgrounds.remove('Diboson')

Backgrounds.append('Wjetsincl')
Backgrounds.append('Zjetsincl')
Backgrounds.append('Diboson')
Backgrounds.append('Higgs')

In [120]:
H_mll = {}; H_mt = {}

for bkg in Backgrounds:
    if bkg == 'Higgs':
        continue
    print(bkg)
    H_mll[bkg] = R.TH1D()
    H_mt[bkg] = R.TH1D()
    
    H_mll[bkg].SetNameTitle('H_mll', 'Invariant mass')
    H_mt[bkg].SetNameTitle('H_mt', 'Transverse mass')
    H_mll[bkg].SetBins(30, 10, 55)
    H_mt[bkg].SetBins(15, 50, 200)
    
    H_mll[bkg].Reset()
    H_mt[bkg].Reset()
    
    H_mll[bkg].Add(selection.Selector().GetHistogram('mll', bkg)) # Aquire the specific histogram from the Selector pointer from the SelectorProxy class    
    H_mt[bkg].Add(selection.Selector().GetHistogram('mt', bkg)) # Aquire the specific histogram from the Selector pointer from the SelectorProxy class
    

singleTop
topX
ttbar
Wjetsincl
Zjetsincl
Diboson


In [121]:
H_mt['Higgs'] = R.TH1D()
H_mt['Higgs'].SetNameTitle('H_mt', 'Transverse mass')
H_mt['Higgs'].SetBins(15,50,200)
H_mt['Higgs'].Reset()

H_mt['Higgs'].Add(selection.Selector().GetHistogram('mt', 'Higgs'), 0.07)

H_mll['Higgs'] = R.TH1D()
H_mll['Higgs'].SetNameTitle('H_mll', 'Invariant mass')
H_mll['Higgs'].SetBins(30,10,55)
H_mll['Higgs'].Reset()

H_mll['Higgs'].Add(selection.Selector().GetHistogram('mll', 'Higgs'))

True

In [122]:
H_mll_d = R.TH1D(); H_mt_d = R.TH1D()

H_mll_d.SetNameTitle('H_mll', 'Invariant mass')
H_mt_d.SetNameTitle('H_mt', 'Transverse mass')
H_mll_d.SetBins(30, 10, 55)
H_mt_d.SetBins(15, 50, 200)

H_mll_d.Reset()
H_mt_d.Reset()

H_mll_d.Add(selection.Selector().GetHistogram('mll', 'data'))
H_mt_d.Add(selection.Selector().GetHistogram('mt', 'data'))


True

### Create the plots

In [123]:
for bkg in Backgrounds:
    H_mll[bkg].SetFillColor(colours[bkg]); 
    H_mt[bkg].SetFillColor(colours[bkg]); 
    
    H_mll[bkg].SetLineColor(colours[bkg]); 
    H_mt[bkg].SetLineColor(colours[bkg]); 


In [124]:
stack_mll = R.THStack("Invariant mass", "");
stack_mt = R.THStack("Transverse mass", "");

for bkg in Backgrounds: 
    stack_mll.RecursiveRemove(H_mll[bkg]); ## Remove previously stacked histograms  
    stack_mt.RecursiveRemove(H_mt[bkg]); ## Remove previously stacked histograms  
    
    stack_mll.Add(H_mll[bkg]); 
    stack_mt.Add(H_mt[bkg]); 
    

In [125]:
H_mll_d.SetLineColor(R.kBlack); 
H_mll_d.SetMarkerStyle(R.kFullCircle); 
H_mll_d.SetMarkerColor(R.kBlack); 

In [126]:
R.gStyle.SetLegendBorderSize(0); ## Remove (default) border around legend 
leg = R.TLegend(0.65, 0.60, 0.9, 0.85); 

leg.Clear();
for bkg in Backgrounds: 
    leg.AddEntry(H_mll[bkg], bkg, "f")  ## Add your histograms to the legend
leg.AddEntry(H_mll_d, "Data", "lep") 

<cppyy.gbl.TLegendEntry object at 0x1172a2b0>

In [127]:
C = R.TCanvas("c", "c", 600, 600)

R.gPad.SetLogy(0) ## Set logarithmic y-axis

stack_mll.Draw("hist"); 
stack_mll.SetMaximum(2e3); 
stack_mll.SetMinimum(1); 
stack_mll.GetYaxis().SetTitle("# events");
stack_mll.GetYaxis().SetTitleOffset(1.3); 
stack_mll.GetXaxis().SetTitle("m_{ll} (GeV)");
stack_mll.GetXaxis().SetTitleOffset(1.3);
H_mll_d.Draw("same E"); 
leg.Draw();
C.Draw();



In [128]:
H_mt_d.SetLineColor(R.kBlack); 
H_mt_d.SetMarkerStyle(R.kFullCircle); 
H_mt_d.SetMarkerColor(R.kBlack); 

In [129]:
R.gStyle.SetLegendBorderSize(0); ## Remove (default) border around legend 
leg = R.TLegend(0.65, 0.60, 0.9, 0.85); 

leg.Clear();
for bkg in Backgrounds: 
    leg.AddEntry(H_mt[bkg], bkg, "f")  ## Add your histograms to the legend
leg.AddEntry(H_mt_d, "Data", "lep") 

<cppyy.gbl.TLegendEntry object at 0x13153c60>

In [130]:
C = R.TCanvas("c", "c", 600, 600)

R.gPad.SetLogy(0) ## Set logarithmic y-axis

stack_mt.Draw("hist"); 
stack_mt.SetMaximum(900); 
stack_mt.SetMinimum(1); 
stack_mt.GetYaxis().SetTitle("# events");
stack_mt.GetYaxis().SetTitleOffset(1.3); 
stack_mt.GetXaxis().SetTitle("m_{T} (GeV)");
stack_mt.GetXaxis().SetTitleOffset(1.3);
H_mt_d.Draw("same E"); 
leg.Draw();
C.Draw();

