In [1]:
import ROOT as rt
import sys
import collections
from collections import OrderedDict
import uproot

import os
import scipy
import awkward as ak
import time

import subprocess
GIT_REPO = subprocess.Popen(['git', 'rev-parse', '--show-toplevel'], stdout=subprocess.PIPE).communicate()[0].rstrip().decode('utf-8')
sys.path.append(GIT_REPO + '/lib/')
from histo_utilities import create_TH1D, create_TH2D, std_color_list, create_TGraph, make_ratio_plot
from helper_functions import *

import numpy as np
from scipy.stats import norm
import math
import CMS_lumi, tdrstyle
style = tdrstyle.setTDRStyle()
CMS_lumi.writeExtraText = 0


print(sys.version)

Welcome to JupyROOT 6.24/06
3.6.8 (default, Nov  2 2021, 13:01:57) 
[GCC 8.4.1 20200928 (Red Hat 8.4.1-1)]


# Load ntuples

In [2]:
%%time

fpath =OrderedDict()
tree = OrderedDict()

start_t = time.time()

# for data_year in ['2022','2023','all']:
for data_year in ['all']:
    if data_year == 'all':path = f'/storage/af/group/phys_exotica/delayedjets/displacedJetMuonAnalyzer/Run3/V1p19//Data_all/v14/normalized/'
    else: path = f'/storage/af/group/phys_exotica/delayedjets/displacedJetMuonAnalyzer/Run3/V1p19//Data{data_year}/v14/normalized/'
    if data_year == '2022':fpath['data'+data_year] = path + "DisplacedJet-EXOCSCCluster_Run2022-PromptReco_goodLumi.root"
    elif data_year == '2023':fpath['data'+data_year] = path + "Muon-EXOCSCCluster_Run2023-PromptReco_goodLumi.root"
    elif data_year == 'all':fpath['data'+data_year] = path + "EXOCSCCluster_Run2022_2023_goodLumi.root"
path_sig = "/storage/af/group/phys_exotica/delayedjets/displacedJetMuonAnalyzer/Run3/V1p19/MC_all/v14/normalized/"
mass = [15,23,30,40,55]
ctau = [1,10,100,1000,10000,100000]
decay = ['B']
for d in decay:
    for m in mass:
        for ct in ctau:
            fpath[f'{m}_{ct}'] = f'{path_sig}/ggH_Hto2Sto4{d}_MH-125-MS-{m}-ctauS-{ct}_TuneCP5_13p6TeV_powheg-pythia8_50000pb_weighted.root'
NEvents = {}

for k,v in fpath.items():
    print (k, v)
    root_dir = uproot.open(v) 

    tree[k] = root_dir['MuonSystem']
    NEvents[k] = root_dir['NEvents'].values()[0]
    # NEvents[k] = root_dir['NEvents'].counts()
    print("NEvents",NEvents[k])


dataall /storage/af/group/phys_exotica/delayedjets/displacedJetMuonAnalyzer/Run3/V1p19//Data_all/v14/normalized/EXOCSCCluster_Run2022_2023_goodLumi.root
NEvents 34804216.0
15_1 /storage/af/group/phys_exotica/delayedjets/displacedJetMuonAnalyzer/Run3/V1p19/MC_all/v14/normalized//ggH_Hto2Sto4B_MH-125-MS-15-ctauS-1_TuneCP5_13p6TeV_powheg-pythia8_50000pb_weighted.root
NEvents 57327670.0
15_10 /storage/af/group/phys_exotica/delayedjets/displacedJetMuonAnalyzer/Run3/V1p19/MC_all/v14/normalized//ggH_Hto2Sto4B_MH-125-MS-15-ctauS-10_TuneCP5_13p6TeV_powheg-pythia8_50000pb_weighted.root
NEvents 56095530.0
15_100 /storage/af/group/phys_exotica/delayedjets/displacedJetMuonAnalyzer/Run3/V1p19/MC_all/v14/normalized//ggH_Hto2Sto4B_MH-125-MS-15-ctauS-100_TuneCP5_13p6TeV_powheg-pythia8_50000pb_weighted.root
NEvents 57049570.0
15_1000 /storage/af/group/phys_exotica/delayedjets/displacedJetMuonAnalyzer/Run3/V1p19/MC_all/v14/normalized//ggH_Hto2Sto4B_MH-125-MS-15-ctauS-1000_TuneCP5_13p6TeV_powheg-pythia8_5

# apply selections

In [3]:
%%time

cscRechitClusterPhi = {}
dtRechitClusterPhi = {}
deltaPhi_cluster  = {}
cscRechitClusterDNN  = {}
dtRechitClusterSize  = {}
dtRechitClusterBX  = {}
weight = {}
sel_ev = {}
sel = {}
gLLP_ctau = {}

categories = ['lowMET','highMET']
categories = ['lowMET']
region_list = ['oot', 'signal']
for region in region_list:
    for tree_k in tree.keys():
        if region == 'oot' and (not tree_k == 'dataall'):continue
        T = tree[tree_k]
        
        for met_cat in categories:
            # if not (met_cat == 'highMET'):continue
            k = f'{region}_{tree_k}_{met_cat}'
            ########### SELECTION: CLUSTERS ############
            me1 = (T['cscRechitClusterNRechitChamberPlus11'].array()+ T['cscRechitClusterNRechitChamberPlus12'].array()+\
            T['cscRechitClusterNRechitChamberMinus11'].array()+ T['cscRechitClusterNRechitChamberMinus12'].array())
            cscRechitClusterR = (T['cscRechitClusterX'].array()**2+T['cscRechitClusterY'].array()**2)**0.5
            L1 = L1_trg(cscRechitClusterR, np.abs(T['cscRechitClusterZ'].array()), np.abs(T['cscRechitClusterSize'].array()))

            #one cluster to pass trigger
            csccluster = np.logical_and(T['cscRechitClusterTimeWeighted'].array() < 12.5, T['cscRechitClusterTimeWeighted'].array() > -5)
            # csccluster = csccluster & L1
            csccluster = csccluster & (me1 ==0)
            csccluster = csccluster & (T['cscRechitClusterSize'].array()>160)
            csccluster = csccluster & (T['cscRechitClusterTimeSpreadWeightedAll'].array()<20)
            csccluster = csccluster & (T['cscRechitClusterJetVetoPt'].array()<30)
            csccluster = csccluster & np.logical_not((T['cscRechitClusterMuonVetoPt'].array() > 30) & T['cscRechitClusterMuonVetoGlobal'].array())    


            dtcluster = (T['dtRechitClusterNHitStation1'].array()==0)
            dtcluster = dtcluster & (T['dtRechitCluster_match_RPChits_dPhi0p5'].array() >= 1)
            dtcluster = dtcluster & (T['dtRechitClusterJetVetoPt'].array()<30)
            dtcluster = dtcluster & np.logical_not((T['dtRechitClusterMuonVetoPt'].array() > 30) & T['dtRechitClusterMuonVetoLooseId'].array())
            # if 'oot' in region: dtcluster = dtcluster & (T['dtRechitCluster_match_RPCBx_dPhi0p5'].array() < 0)
            # else:dtcluster = dtcluster & (T['dtRechitCluster_match_RPCBx_dPhi0p5'].array() == 0)


        ########### SELECTION: EVENTS ############
            sel_ev[k] = T['HLT_CSCDT'].array()
            
            sel_ev[k] = sel_ev[k] & (T['nCscRechitClusters'].array()==1) & (T['nDtRechitClusters'].array()==1)
            
            if met_cat == 'lowMET':
                sel_ev[k] = np.logical_and(sel_ev[k] ,T['met'].array() < 200)
                
#             else: 
#                 sel_ev = sel_ev & (T['met'].array() > 150) & (T['met'].array() < 200)
#                 csccluster = np.logical_and(T['cscRechitClusterTimeWeighted'].array() < 12.5, T['cscRechitClusterTimeWeighted'].array() > -5)
#                 csccluster = csccluster & L1
#                 csccluster = csccluster & (me1 ==0)
#                 csccluster = csccluster & (T['cscRechitClusterTimeSpreadWeightedAll'].array()<20)
            
#                 dtcluster = (T['dtRechitClusterNHitStation1'].array()==0)
#                 dtcluster = dtcluster & (T['dtRechitCluster_match_RPChits_dPhi0p5'].array() >= 1)
                
            sel_ev[k]  = sel_ev[k] & (np.sum(dtcluster, axis = 1) == 1)
            sel_ev[k]  = sel_ev[k] & (np.sum(csccluster, axis = 1) == 1)
        ########### BRANCHES ############

           ##### event variables ##### 
            # make sure cluster0 and cluster1 index are different

            cscRechitClusterPhi[k] = T['cscRechitClusterPhi'].array()[csccluster][sel_ev[k]][:,0]
            dtRechitClusterPhi[k] = T['dtRechitClusterPhi'].array()[dtcluster][sel_ev[k]][:,0]
            deltaPhi_cluster[k] = deltaPhi(np.array(cscRechitClusterPhi[k]), np.array(dtRechitClusterPhi[k]))

            cscRechitClusterDNN[k] = T['cscRechitClusterDNN_bkgMC_plusBeamHalo'].array()[csccluster][sel_ev[k]][:,0]
            dtRechitClusterBX[k] = T['dtRechitCluster_match_RPCBx_dPhi0p5'].array()[dtcluster][sel_ev[k]][:,0]
            dtRechitClusterSize[k] = T['dtRechitClusterSize'].array()[dtcluster][sel_ev[k]][:,0]
            # cscRechitClusterSize1[k] = T['cscRechitClusterSize'].array()[sel_ev][cluster1][:,0]

            sel[k] = (np.abs(deltaPhi_cluster[k])>0.4)
            
            if 'oot' in region:sel[k] = sel[k] & (cscRechitClusterDNN[k] > 0.96) & (dtRechitClusterBX[k]<0)
            elif 'invertDNN' in region: sel[k] = sel[k] & (cscRechitClusterDNN[k] < 0.96) & (dtRechitClusterBX[k] == 0)
            elif 'signal' in region:sel[k] = sel[k] & (cscRechitClusterDNN[k] > 0.96) & (dtRechitClusterBX[k] == 0)
            else: assert(False)


            print(k, np.count_nonzero(sel[k]))
            deltaPhi_cluster[k] = deltaPhi_cluster[k][sel[k]]
            dtRechitClusterSize[k] = dtRechitClusterSize[k][sel[k]]
            HMTEff = T['cscRechitClusterHMTEfficiency'].array()[csccluster][sel_ev[k]][sel[k]][:,0]
            if 'data' in k:weight[k] = (T['weight'].array())[sel_ev[k]][sel[k]]* 0.0 + 1
            else: weight[k] = (T['pileupWeight'].array()*T['weight'].array())[sel_ev[k]][sel[k]]*HMTEff
            
            gLLP_ctau[k] = T['gLLP_ctau'].array()[sel_ev[k]][sel[k]]

            if np.count_nonzero(sel)<=1:continue
            print("correlation between two variables:", k, scipy.stats.pearsonr(deltaPhi_cluster[k], dtRechitClusterSize[k]))
            print("events in this region:", k, len(deltaPhi_cluster[k]))

oot_dataall_lowMET 263
signal_dataall_lowMET 363
signal_15_1_lowMET 0
signal_15_10_lowMET 0
signal_15_100_lowMET 0
signal_15_1000_lowMET 2390
signal_15_10000_lowMET 366
signal_15_100000_lowMET 10
signal_23_1_lowMET 0
signal_23_10_lowMET 0
signal_23_100_lowMET 0
signal_23_1000_lowMET 1061
signal_23_10000_lowMET 802
signal_23_100000_lowMET 12
signal_30_1_lowMET 0
signal_30_10_lowMET 0
signal_30_100_lowMET 0
signal_30_1000_lowMET 418
signal_30_10000_lowMET 1155
signal_30_100000_lowMET 30
signal_40_1_lowMET 0
signal_40_10_lowMET 0
signal_40_100_lowMET 0
signal_40_1000_lowMET 55
signal_40_10000_lowMET 1179
signal_40_100000_lowMET 30
signal_55_1_lowMET 0
signal_55_10_lowMET 0
signal_55_100_lowMET 0
signal_55_1000_lowMET 1
signal_55_10000_lowMET 118
signal_55_100000_lowMET 1
CPU times: user 36.9 s, sys: 3.16 s, total: 40 s
Wall time: 40.1 s


## Create datacard with OOT data

In [4]:
# dummy 20% signal uncertainty
sig_unc = {'ggH':[[0.2]*4]}
sig_unc_name = ['dtcsc_test']

In [5]:
data_key = 'oot_dataall_lowMET'
datacard_version = 'v5'
target_ctau = [1,3,5,10,30,50,100,300,500,1000,1500,2000,3000,5000,8000,10000,30000,50000,100000]

In [6]:
def weight_calc(llp_ct, new_ctau, old_ctau, nLLP = 2, flag = False):
    source = np.exp(-1.0*llp_ct/old_ctau)/old_ctau**nLLP
    weight = 1.0/new_ctau**nLLP * np.exp(-1.0*llp_ct/new_ctau)/source
    return weight


In [None]:

a, b, c, d, pred, unc_pred = {}, {}, {}, {}, {}, {}

outDataCardsDir=f"/storage/af/user/christiw/login-1/christiw/LLP/Run3/CMSSW_14_1_0_pre4/src/HiggsAnalysis/MuonSystemLimit/combine/datacards_run3HMT/dtcsc/{datacard_version}/"
outputTreeDir = outDataCardsDir.replace("datacards","limitTrees")
os.system(f"mkdir -p {outputTreeDir}")
os.system(f"mkdir -p {outDataCardsDir}")


signal_region = 'a'
prefix = 'dtcsc_'
bkg_pred = 'OOT'

NHIT_CUT = 90
DPHI_CUT = 2.8
####### background prediction ######
if bkg_pred == 'OOT':
    k = 'oot_dataall_lowMET'
    TF = len(dtRechitClusterSize['signal_dataall_lowMET'])/len(dtRechitClusterSize['oot_dataall_lowMET'])
    a[k] = np.count_nonzero((dtRechitClusterSize[k]>=NHIT_CUT) &  (np.abs(deltaPhi_cluster[k])>=DPHI_CUT)) * TF
    b[k] = np.count_nonzero((dtRechitClusterSize[k]>=NHIT_CUT) &  (np.abs(deltaPhi_cluster[k])<DPHI_CUT)) * TF
    c[k] = np.count_nonzero((dtRechitClusterSize[k]<NHIT_CUT) &  (np.abs(deltaPhi_cluster[k])<DPHI_CUT)) * TF
    d[k] = np.count_nonzero((dtRechitClusterSize[k]<NHIT_CUT) &  (np.abs(deltaPhi_cluster[k])>=DPHI_CUT)) * TF
    if c[k] == 0: pred[k] = 999
    else: pred[k] = b[k]*d[k]/c[k]
    unc_pred[k] = (TF/c[k] + TF/b[k] + TF/d[k])**0.5*pred[k]
    print(a[k],b[k],c[k],d[k], pred[k],unc_pred[k])
    bkg_rate = [pred[k],b[k],c[k],d[k]]
    observation = [pred[k],b[k],c[k],d[k]]

####### signal prediction ######
mass = [15,23,30,40,55]
ctau = [1,10,100,1000,100000]
decay = ['B']
for dec in decay:
    for m in mass:
        for ct in target_ctau:
            signal_rate = {}
            mc_stat_unc = {}
            gmn = {}
            OLD_CTAU = ctau.copy()
            if '.' in str(ct):ctf = float(ct)
            else: ctf = int(ct)
            ct_list = 10**int(math.log10(ctf))

            if ctf < OLD_CTAU[0]: ct_list = [OLD_CTAU[0]]
            elif ctf>OLD_CTAU[-1]: ct_list = [OLD_CTAU[-1]]
            elif ctf in OLD_CTAU: ct_list = [ctf]
            elif len(OLD_CTAU) == 2:ct_list = OLD_CTAU.copy()
            else:ct_list = [ct_list,ct_list*10]
            print(ct, ct_list)# determine which ctaus are used
            
            p = 'ggH'
            signal_rate[p] = np.zeros((4,))
            gmn[p] = np.zeros((4,))
            mc_stat_unc[p] = np.zeros((4,))
            
            
            # sig_unc[p] = []
            
            for i, ct0 in enumerate(ct_list):
                k = f'signal_{m}_{ct0}_lowMET'

                if np.count_nonzero(gLLP_ctau[k])==0:continue
                                
                ######################

                gLLP_ctau_sum = np.sum(gLLP_ctau[k], axis = 1)
                
                weight_ctau = weight_calc(gLLP_ctau_sum, ctf/10., int(ct0)/10.) # convert everything to cm
                
                if len(ct_list) == 1:weight_cond = gLLP_ctau_sum >= 0
                else:
                    if i == 0 : weight_cond = gLLP_ctau_sum<int(ct_list[0]/2)
                    else: weight_cond = gLLP_ctau_sum>=int(ct_list[0]/2)
                            
                            
                w = weight[k]*weight_ctau
                a = weight_cond & (dtRechitClusterSize[k]>=NHIT_CUT) &  (np.abs(deltaPhi_cluster[k])>=DPHI_CUT)
                b = weight_cond & (dtRechitClusterSize[k]>=NHIT_CUT) &  (np.abs(deltaPhi_cluster[k])<DPHI_CUT)
                c = weight_cond & (dtRechitClusterSize[k]<NHIT_CUT) &  (np.abs(deltaPhi_cluster[k])<DPHI_CUT)
                d = weight_cond & (dtRechitClusterSize[k]<NHIT_CUT) &  (np.abs(deltaPhi_cluster[k])>=DPHI_CUT)
                
                signal_rate[p][0]+=np.sum(w[a])
                signal_rate[p][1]+=np.sum(w[b])
                signal_rate[p][2]+=np.sum(w[c])
                signal_rate[p][3]+=np.sum(w[d])
                

                card_name = f'ggH_HToSS_STo4{dec}_ms{m}_ctau{ct}_nhits{NHIT_CUT}_dphi'+str(round(DPHI_CUT,1)).replace(".","p")
                print(card_name)
#                 ## create datacards ##
                make_datacard_2tag(
                outDataCardsDir,
                card_name,
                signal_rate,
                signal_rate['ggH'][0],
                bkg_rate,
                observation,
                [], 
                [], 
                sig_unc, 
                sig_unc_name,
                signal_region, 
                prefix)

            
