## Analyzing Elisa's 3P1F and 2P2F event lists.

In [1]:
import os
import sys
import numpy as np
import ROOT as rt
from pprint import pprint
from collections import Counter
from sidequestsdatacjlst_fw import CjlstFlag

sys.path.append("/blue/avery/rosedj1/HiggsMassMeasurement/")
from Utils_Python.Utils_Files import check_overwrite

Welcome to JupyROOT 6.20/04


ModuleNotFoundError: No module named 'sidequestsdatacjlst_fw'

In [26]:
def write_tree_info_to_txt(infile, outtxt, keep_2P2F=True, keep_3P1F=True):
    """
    Write info from TFile `infile` from TTree 'passedEvents' to `outtxt`.

    Info which gets written:
    Run : LumiSect : Event
    """
    tfile = rt.TFile.Open(infile)
    tree = tfile.Get("passedEvents")

    with open(outtxt, "w") as f:
        f.write("# Run : LumiSect : Event\n")
        for evt in tree:
            keep_evt = True if (keep_2P2F and evt.is2P2F) or (keep_3P1F and evt.is3P1F) else False
            if keep_evt:
                f.write(f"{evt.Run} : {evt.LumiSect} : {evt.Event}\n")
    print(f"TTree info written to:\n{outtxt}")

def get_list_of_lines(evt_ls_txt):
    """
    Return a list of the lines from `evt_ls_txt`.
    The lines must start with a digit.
    Trailing newlines ('\\n') are stripped.
    """
    with open(evt_ls_txt, "r") as f:
        return [line.rstrip('\n') for line in f.readlines() if line[0].isdigit()]

def get_list_of_tuples(evt_ls):
    """
    Return a list of 3-tuples from a list of strings `evt_ls`:

    [
        (Run1, LumiSect1, Event1),
        (Run2, LumiSect2, Event2),
        ...
    ]
    """
    new_evt_ls = []
    for line in evt_ls:
        tup = tuple([int(num) for num in line.split(":")])
        new_evt_ls.append(tup)
    return new_evt_ls

In [34]:
class FileComparer:

    def __init__(self, txt_file1, txt_file2, control_reg="", verbose=False):
        """
        Feed in two txt files to be compared.

        NOTE:
        - Each txt file is converted to a list of 3-tuples and stored.
        - Only lines which begin with a digit are read and stored.

        Parameters
        ----------
        control_reg : str
            Used for printing and writing files.
        """
        self.file1 = txt_file1
        self.file2 = txt_file2
        self.cr = control_reg
        self.verbose = verbose

        self.check_cr(txt_file1, txt_file2)
        if control_reg in "":
            self.cr = "all"
        self.ls_of_tup_file1_nodup = None
        self.ls_of_tup_file2_nodup = None

        # Check for duplicates.
        self.ls_of_tup_file1 = get_list_of_tuples(get_list_of_lines(txt_file1))
        if self.check_for_dups(txt_file1, self.ls_of_tup_file1):
            # Remove duplicates by turning to a set and then back to list.
            self.ls_of_tup_file1_nodup = list(set(self.ls_of_tup_file1))
        else:
            self.ls_of_tup_file1_nodup = self.ls_of_tup_file1

        self.ls_of_tup_file2 = get_list_of_tuples(get_list_of_lines(txt_file2))
        if self.check_for_dups(txt_file2, self.ls_of_tup_file2):
            self.ls_of_tup_file2_nodup = list(set(self.ls_of_tup_file2))
        else:
            self.ls_of_tup_file2_nodup = self.ls_of_tup_file2

        self.compare_files()

    def check_for_dups(self, txt_file, ls_of_tup):
        """Return True and print info if duplicates within a file are found."""
        len_ls = len(ls_of_tup)
        len_set = len(set(ls_of_tup))
        if len_ls != len_set:
            n_dups = len_ls - len_set
            print(f"[WARNING] Duplicates ({n_dups}) found in file: {txt_file}")
            print(f"[WARNING] len(ls)={len_ls} != len(set)={len_set}")
            if self.verbose:
                # There's some counting error here...
                # I know there are 120 duplicates, but counter only finds 118.
                counter = Counter(ls_of_tup)
                print(f"Printing duplicates in file:\n{txt_file}")
                dup_key_ls = [k for k,v in counter.items() if v > 1]
                # pprint(dup_key_ls)
                # assert n_dups == len(dup_key_ls)
                pprint(dup_key_ls)
            return True
        return False

    def check_cr(self, path1, path2):
        """Make sure that the control region is the one requested."""
        cr_low = self.cr.lower()
        assert cr_low in ("2p2f", "3p1f", "")
        # Make sure that the two files have the requested CR.
        msg = f"The `control_reg` ({self.cr}) not found in names of txt files."
        assert all(cr_low in f.lower() for f in (path1, path2)), msg

    def compare_files(self):
        """Store unique and common info about files. Called when instantiated."""
        self.set_common_to_both = set(self.ls_of_tup_file1_nodup) & set(self.ls_of_tup_file2_nodup)
        self.set_unique_to_file1 = set(self.ls_of_tup_file1_nodup) - set(self.ls_of_tup_file2_nodup)
        self.set_unique_to_file2 = set(self.ls_of_tup_file2_nodup) - set(self.ls_of_tup_file1_nodup)

    def print_results(self, whose="all", show_n_evts=25, save_to_file=None):
        """Print info describing differences between two files.
        
        Parameters
        ----------
        whose : str
            "file1", "file2", "all"
        """
        print(f"Comparing {self.cr.upper()}:")
        print(f"file1: {self.file1}")
        print(f"file2: {self.file2}")

        print(f"{'n_evts total file1 (no dup): ':<25}{len(self.ls_of_tup_file1_nodup)}")
        print(f"{'n_evts total file2 (no dup): ':<25}{len(self.ls_of_tup_file2_nodup)}")
        print(f"{'n_evts in common: ':<25}{len(self.set_common_to_both)}")
        print(f"{'n_evts unique to file1: ':<25}{len(self.set_unique_to_file1)}")
        print(f"{'n_evts unique to file2: ':<25}{len(self.set_unique_to_file2)}")

        header = "#-- Run -- LumiSect -- Event --#"
        if show_n_evts == -1:
            show_n_evts = None
        if whose in ("file1", "all"):
            print(f"  file1's unique events:")
            print(header)
            pprint(list(self.set_unique_to_file1)[:show_n_evts])
            print()
        if whose in ("file2", "all"):
            print(f"  file2's unique events:")
            print(header)
            pprint(list(self.set_unique_to_file2)[:show_n_evts])
            print()

    def save_events_to_txt(self, kind, outtxt, no_dup=True, overwrite=False):
        """
        Write the events to `outtxt` in the format:

        Run : LumiSect : Event

        Parameters
        ----------
        kind : str
            Choose which events to write to `outtxt`.
            "file1", "file2", "common", "file1_unique", "file2_unique"
        """
        check_overwrite(outtxt, overwrite=overwrite)
        assert kind in ("file1", "file2", "common", "file1_unique", "file2_unique")

        if kind in "file1":
            iter_ls_of_tup = self.ls_of_tup_file1_nodup if no_dup else self.ls_of_tup_file1
        elif kind in "file2":
            iter_ls_of_tup = self.ls_of_tup_file2_nodup if no_dup else self.ls_of_tup_file2
        elif kind in "common":
            iter_ls_of_tup = self.set_common_to_both
        elif kind in "file1_unique":
            iter_ls_of_tup = self.set_unique_to_file1
        elif kind in "file2_unique":
            iter_ls_of_tup = self.set_unique_to_file2

        with open(outtxt, "w") as f:
            f.write("# Run : LumiSect : Event\n")
            for tup in iter_ls_of_tup:
                f.write(f"{tup[0]} : {tup[1]} : {tup[2]}\n")
            print(f"Wrote '{self.cr} {kind}' events to file:\n{outtxt}")

In [28]:
# infile_jake_tree = "/blue/avery/rosedj1/ZplusXpython/data/ZLL_CR_FRapplied/Data_2018_NoDuplicates_RunEventLumi.root"
infile_jake_tree = "/blue/avery/rosedj1/ZplusXpython/data/ZLL_CR_FRapplied/new_data2018/cr_ZLL.root"

infile_elisa       = "/blue/avery/rosedj1/ZplusXpython/sidequests/findmissingevents_comparetoelisa/CRLLos_listOfEvents.txt"
infile_elisa_2p2f  = "/blue/avery/rosedj1/ZplusXpython/sidequests/findmissingevents_comparetoelisa/CRLLos_2P2F_listOfEvents.txt"
infile_elisa_3p1f  = "/blue/avery/rosedj1/ZplusXpython/sidequests/findmissingevents_comparetoelisa/CRLLos_3P1F_listOfEvents.txt"

outdir = "/blue/avery/rosedj1/ZplusXpython/sidequests/findmissingevents_comparetoelisa/jakes_new2018data/"

infile_jake      = os.path.join(outdir, "CRLLos_listOfEvents_jake.txt")
infile_jake_2p2f = os.path.join(outdir, "CRLLos_listOfEvents_jake_2P2F.txt")
infile_jake_3p1f = os.path.join(outdir, "CRLLos_listOfEvents_jake_3P1F.txt")

outfile_elisa_2p2f_unique  = os.path.join(outdir, "CRLLos_2P2F_listOfEvents_unique.txt")
outfile_elisa_3p1f_unique  = os.path.join(outdir, "CRLLos_3P1F_listOfEvents_unique.txt")
outfile_jake_2p2f_unique = os.path.join(outdir, "CRLLos_listOfEvents_jake_2P2F_unique.txt")
outfile_jake_3p1f_unique = os.path.join(outdir, "CRLLos_listOfEvents_jake_3P1F_unique.txt")
outfile_LLR_data2018 = "/blue/avery/rosedj1/ZplusXpython/sidequests/findmissingevents_comparetoelisa/"

outfile_2p2f_common = os.path.join(outdir, "CRLLos_listOfEvents_2P2F_common.txt")
outfile_3p1f_common = os.path.join(outdir, "CRLLos_listOfEvents_3P1F_common.txt")
# write_tree_info_to_txt(infile_jake_tree, infile_jake)

## Make txt files of events.

In [9]:
write_tree_info_to_txt(infile_jake_tree, infile_jake_2p2f, keep_2P2F=True, keep_3P1F=False)
write_tree_info_to_txt(infile_jake_tree, infile_jake_3p1f, keep_2P2F=False, keep_3P1F=True)

TTree info written to:
/blue/avery/rosedj1/ZplusXpython/sidequests/findmissingevents_comparetoelisa/jakes_new2018data/CRLLos_listOfEvents_jake_2P2F.txt
TTree info written to:
/blue/avery/rosedj1/ZplusXpython/sidequests/findmissingevents_comparetoelisa/jakes_new2018data/CRLLos_listOfEvents_jake_3P1F.txt


## Compare files.

In [5]:
fc_elisa_2p2fvs3p1f = FileComparer(infile_elisa_2p2f, infile_elisa_3p1f)



In [6]:
fc_elisa_2p2fvs3p1f.set_common_to_both

{(315357, 120, 90263024),
 (315357, 582, 474271871),
 (315420, 147, 111359074),
 (315420, 1017, 671153361),
 (315512, 947, 703286863),
 (315645, 174, 181433236),
 (315764, 252, 345190884),
 (315840, 794, 913785809),
 (315840, 973, 1072588622),
 (316060, 480, 497131326),
 (316114, 416, 428493934),
 (316114, 893, 891656605),
 (316114, 1293, 1312567745),
 (316199, 860, 1188017841),
 (316239, 543, 672514410),
 (316457, 1310, 1472332465),
 (316569, 1369, 1879123903),
 (316590, 436, 591724376),
 (316701, 284, 408489512),
 (316766, 1884, 2594040013),
 (317087, 718, 952431980),
 (317182, 530, 685103194),
 (317182, 866, 1209428434),
 (317297, 524, 829605824),
 (317320, 1263, 1875457729),
 (317392, 1137, 1604619521),
 (317392, 1222, 1734755160),
 (317527, 1391, 1992720836),
 (317626, 234, 322598287),
 (317626, 306, 427271140),
 (317648, 58, 48360780),
 (317683, 199, 255555243),
 (319526, 35, 61882137),
 (319579, 238, 295829669),
 (319579, 2200, 3454641909),
 (319639, 100, 117898040),
 (319849, 4

In [35]:
fc_jakevselisa_3p1f = FileComparer(infile_jake_3p1f, infile_elisa_3p1f, control_reg="3p1f", verbose=True)
fc_jakevselisa_2p2f = FileComparer(infile_jake_2p2f, infile_elisa_2p2f, control_reg="2p2f", verbose=True)
fc_jakevselisa_all  = FileComparer(infile_jake, infile_elisa, control_reg="", verbose=True)

# fc_jakevselisa_3p1f.print_results(whose="file1", show_n_evts=5)
# fc_jakevselisa_3p1f.print_results(whose="file2", show_n_evts=5)
# fc_jakevselisa_2p2f.print_results(whose="file1", show_n_evts=5)
# fc_jakevselisa_2p2f.print_results(whose="file2", show_n_evts=5)
# # fc_jakevselisa_all.print_results(whose="all", show_n_evts=10)

# # Write the events to txt.
# overwrite = 0
# fc_jakevselisa_3p1f.save_events_to_txt(kind="file1_unique", outtxt=outfile_jake_3p1f_unique, no_dup=True, overwrite=overwrite)
# fc_jakevselisa_3p1f.save_events_to_txt(kind="file2_unique", outtxt=outfile_elisa_3p1f_unique, no_dup=True, overwrite=overwrite)
# fc_jakevselisa_3p1f.save_events_to_txt(kind="common", outtxt=outfile_3p1f_common, no_dup=True, overwrite=overwrite)

# fc_jakevselisa_2p2f.save_events_to_txt(kind="file1_unique", outtxt=outfile_jake_2p2f_unique, no_dup=True, overwrite=overwrite)
# fc_jakevselisa_2p2f.save_events_to_txt(kind="file2_unique", outtxt=outfile_elisa_2p2f_unique, no_dup=True, overwrite=overwrite)
# fc_jakevselisa_2p2f.save_events_to_txt(kind="common", outtxt=outfile_2p2f_common, no_dup=True, overwrite=overwrite)

Printing duplicates in file:
/blue/avery/rosedj1/ZplusXpython/sidequests/findmissingevents_comparetoelisa/CRLLos_3P1F_listOfEvents.txt
[(315512, 947, 703286863)]
Printing duplicates in file:
/blue/avery/rosedj1/ZplusXpython/sidequests/findmissingevents_comparetoelisa/CRLLos_2P2F_listOfEvents.txt
[(315512, 947, 703286863)]
Printing duplicates in file:
/blue/avery/rosedj1/ZplusXpython/sidequests/findmissingevents_comparetoelisa/CRLLos_listOfEvents.txt
[(316114, 416, 428493934),
 (315645, 174, 181433236),
 (315764, 252, 345190884),
 (316199, 860, 1188017841),
 (316239, 543, 672514410),
 (316766, 1884, 2594040013),
 (316590, 436, 591724376),
 (316060, 480, 497131326),
 (315840, 973, 1072588622),
 (316569, 1369, 1879123903),
 (315420, 1017, 671153361),
 (317626, 234, 322598287),
 (317626, 306, 427271140),
 (317392, 1137, 1604619521),
 (317182, 866, 1209428434),
 (317320, 1263, 1875457729),
 (317392, 1222, 1734755160),
 (319993, 134, 221669019),
 (319849, 465, 778725049),
 (324201, 102, 1628

In [39]:
len(fc_jakevselisa_all.ls_of_tup_file2) - len(fc_jakevselisa_all.ls_of_tup_file2_nodup)

120

In [36]:
# Make a manual counter.
# Print out events which appear more than once in fc_jakevselisa_all.ls_of_tup_file2.
for f in fc_jakevselisa_all.ls_of_tup_file2:
    ct = 0
    if f in fc_jakevselisa_all.ls_of_tup_file2_nodup:
        if ct == 5: break
        print(f)
# Identify the 2 events from 120 which didn't appear in 118.
# Why did counter not find them?

## LLR Group's (diff. xs) RedBkg Files

Vukasin pointed me to their root files:

- `/afs/cern.ch/user/v/vmilosev/public/forJake/new_ZX_LLR/AllData*`

Let's have a look.

In [2]:
infile_llr_2018 = "/blue/avery/rosedj1/ZplusXpython/sidequests/data/LLR_redbkg/AllData_ZX_redTree_2018.root"

f = rt.TFile(infile_llr_2018)
t = f.Get("SelectedTree")
t

<ROOT.TTree object ("SelectedTree") at 0x560336c69f00>

In [3]:
list(t.GetListOfBranches())

[<ROOT.TBranch object ("dbkg_kin") at 0x56033867aa10>,
 <ROOT.TBranch object ("dbkg") at 0x560338683e60>,
 <ROOT.TBranch object ("d_2j") at 0x560338684400>,
 <ROOT.TBranch object ("d_2j_JESup") at 0x5603386849a0>,
 <ROOT.TBranch object ("d_2j_JESdn") at 0x560338684f40>,
 <ROOT.TBranch object ("d_2j_JERup") at 0x5603386854e0>,
 <ROOT.TBranch object ("d_2j_JERdn") at 0x560338685a80>,
 <ROOT.TBranch object ("d_2j_qq") at 0x560338686020>,
 <ROOT.TBranch object ("d_2j_other") at 0x560338686580>,
 <ROOT.TBranch object ("d_2j_qqgen") at 0x560338686b20>,
 <ROOT.TBranch object ("d_2jgen") at 0x5603386870c0>,
 <ROOT.TBranch object ("dphi") at 0x560338687660>,
 <ROOT.TBranch object ("dphigen") at 0x560338687c00>,
 <ROOT.TBranch object ("mjjgen") at 0x5603386881a0>,
 <ROOT.TBranch object ("mjj") at 0x560338688740>,
 <ROOT.TBranch object ("gene") at 0x560338688ce0>,
 <ROOT.TBranch object ("genpt") at 0x560338689280>,
 <ROOT.TBranch object ("ngenjet") at 0x560338689820>,
 <ROOT.TBranch object ("vbfM

In [4]:
t.Show(0)

 dbkg_kin        = 0.808485
 dbkg            = 0.000100291
 d_2j            = 0.5
 d_2j_JESup      = 0.5
 d_2j_JESdn      = 0.5
 d_2j_JERup      = 0.5
 d_2j_JERdn      = 0.5
 d_2j_qq         = 0.5
 d_2j_other      = -nan
 d_2j_qqgen      = -nan
 d_2jgen         = -nan
 dphi            = 0
 dphigen         = 0
 mjjgen          = 0
 mjj             = 0
 gene            = 0
 genpt           = 1.4013e-45
 ngenjet         = 0
 vbfMela         = 0.502897
 vbfMela_gen     = -nan
 vbfMela_qg      = 4.59135e-41
 weight          = 0.0134422
 weight_up       = (vector<float>*)0x560338954410
 weight_dn       = (vector<float>*)0x5603389543f0
 weight_name     = (vector<TString>*)0x56033897b5f0
 nmatch          = 0
 drj             = (vector<float>*)0x56033895d920
 dptj            = (vector<float>*)0x56033895d970
 dphij           = (vector<float>*)0x56033896f750
 GenHPt          = 176
 GenHEta         = 1.11152e-33
 GenHPhi         = 0
 GenHMass        = 8.96831e-44
 ZZPt            = 85.9009
 nClean

In [None]:
t.Scan("htxs_stage1_red_cat:htxs_stage1_red_catName:htxs_stage1_red_prod_cat:htxs_stage1_red_prod_catName")

In [17]:
# htxs_stage1_red_catName_ls = [str(evt.htxs_stage1_red_catName) for evt in t if evt.htxs_stage1_red_catName == 'ZX']
# count_ZX_str = Counter(htxs_stage1_red_catName_ls)
htxs_stage1_red_cat_ls = [str(evt.htxs_stage1_red_cat) for evt in t if evt.htxs_stage1_red_cat == -2]
count_ZX_cat = Counter(htxs_stage1_red_cat_ls)
# dup_key_ls = [k for k,v in counter.items() if v > 1]
print(count_ZX_cat)

Counter({'-2': 12331})


In [18]:
t.GetEntries()

12331

In [35]:
elisa_evtid_2p2f_tup = get_list_of_tuples(get_list_of_lines(infile_elisa_2p2f))
n_tot_tup = len(elisa_evtid_2p2f_tup)
start_at = 1000
for num_tup, tup in enumerate(elisa_evtid_2p2f_tup[start_at:], start_at):
    if (num_tup % 1000) == 0:
        print(f"Checking tuple #{num_tup}/{n_tot_tup}")
    for evt_num, evt in enumerate(t):
        if evt.RunNumber != elisa_evtid_2p2f_tup_example[0]:
            continue
        if evt.LumiNumber != elisa_evtid_2p2f_tup_example[1]:
            continue
        if evt.EventNumber != elisa_evtid_2p2f_tup_example[2]:
            continue
        print(f"Event {evt_num}, {evt.RunNumber}:{evt.LumiNumber}:{evt.EventNumber}")
        break

Checking tuple #1000/46067
Checking tuple #2000/46067
Checking tuple #3000/46067
Checking tuple #4000/46067
Checking tuple #5000/46067
Checking tuple #6000/46067
Checking tuple #7000/46067
Checking tuple #8000/46067
Checking tuple #9000/46067
Checking tuple #10000/46067
Checking tuple #11000/46067
Checking tuple #12000/46067
Checking tuple #13000/46067
Checking tuple #14000/46067
Checking tuple #15000/46067
Checking tuple #16000/46067
Checking tuple #17000/46067
Checking tuple #18000/46067
Checking tuple #19000/46067
Checking tuple #20000/46067
Checking tuple #21000/46067
Checking tuple #22000/46067
Checking tuple #23000/46067
Checking tuple #24000/46067
Checking tuple #25000/46067
Checking tuple #26000/46067
Checking tuple #27000/46067
Checking tuple #28000/46067
Checking tuple #29000/46067
Checking tuple #30000/46067
Checking tuple #31000/46067
Checking tuple #32000/46067
Checking tuple #33000/46067
Checking tuple #34000/46067


KeyboardInterrupt: 

In [None]:
evt_ls_llr = [(evt.RunNumber, evt.LumiNumber, evt.EventNumber, ) for evt in t]

pprint(evt_ls_llr[:20])

# evt_set_llr = set()
# evt_set_elisa = set()

with open("", "w") as f:
    f.write("# Run : LumiSect : Event\n")
    for evt in tree:
        keep_evt = True if (keep_2P2F and evt.is2P2F) or (keep_3P1F and evt.is3P1F) else False
        if keep_evt:
            f.write(f"{evt.Run} : {evt.LumiSect} : {evt.Event}\n")
print(f"TTree info written to:\n{outtxt}")