In [18]:
import ROOT as rt
from pprint import pprint
import numpy as np
from collections import Counter

In [19]:
def write_tree_info_to_txt(infile, outtxt, keep_2P2F=True, keep_3P1F=True):
    """
    Write info from TFile `infile` from TTree 'passedEvents' to `outtxt`.

    Info which gets written:
    Run : LumiSect : Event
    """
    tfile = rt.TFile.Open(infile)
    tree = tfile.Get("passedEvents")

    with open(outtxt, "w") as f:
        f.write("# Run : LumiSect : Event\n")
        for evt in tree:
            keep_evt = True if (keep_2P2F and evt.is2P2F) or (keep_3P1F and evt.is3P1F) else False
            if keep_evt:
                f.write(f"{evt.Run} : {evt.LumiSect} : {evt.Event}\n")

def get_list_of_lines(evt_ls_txt):
    """
    Return a list of the lines from `evt_ls_txt`.
    The lines must start with a digit.
    Trailing newlines ('\\n') are stripped.
    """
    with open(evt_ls_txt, "r") as f:
        return [line.rstrip('\n') for line in f.readlines() if line[0].isdigit()]

def get_list_of_tuples(evt_ls):
    """
    Return a list of 3-tuples from a list of strings `evt_ls`:

    [
        (Run1, LumiSect1, Event1),
        (Run2, LumiSect2, Event2),
        ...
    ]
    """
    new_evt_ls = []
    for line in evt_ls:
        tup = tuple([int(num) for num in line.split(":")])
        new_evt_ls.append(tup)
    return new_evt_ls

In [23]:
class FileComparer:

    def __init__(self, txt_file1, txt_file2, control_reg=""):
        """
        Feed in two txt files to be compared.

        NOTE:
        - Each txt file is converted to a list of 3-tuples and stored.
        - Only lines which begin with a digit are read and stored.
        """
        self.file1 = txt_file1
        self.file2 = txt_file2
        self.cr = control_reg

        self.check_cr(txt_file1, txt_file2)
        if control_reg in "":
            self.cr = "all"
        self.ls_of_tup_file1_nodup = None
        self.ls_of_tup_file2_nodup = None

        self.ls_of_tup_file1 = get_list_of_tuples(get_list_of_lines(txt_file1))
        if self.check_for_dups(txt_file1, self.ls_of_tup_file1):
            # Remove duplicates by turning to a set and then back to list.
            self.ls_of_tup_file1_nodup = list(set(self.ls_of_tup_file1))

        self.ls_of_tup_file2 = get_list_of_tuples(get_list_of_lines(txt_file2))
        if self.check_for_dups(txt_file2, self.ls_of_tup_file2):
            self.ls_of_tup_file2_nodup = list(set(self.ls_of_tup_file2))

        self.compare_files()

    def check_for_dups(self, txt_file, ls_of_tup):
        """Return True and print info if duplicates within a file are found."""
        len_ls = len(ls_of_tup)
        len_set = len(set(ls_of_tup))
        if len_ls != len_set:
            print(f"[WARNING] Duplicates ({len_ls-len_set}) found in file: {txt_file}")
            print(f"[WARNING] len(ls)={len_ls} != len(set)={len_set}")
            return True
        return False

    def check_cr(self, path1, path2):
        """Make sure that the control region is the one requested."""
        cr_low = self.cr.lower()
        assert cr_low in ("2p2f", "3p1f", "")
        # Make sure that the two files have the requested CR.
        msg = f"The `control_reg` ({self.cr}) not found in names of txt files."
        assert all(cr_low in f.lower() for f in (path1, path2)), msg

    def compare_files(self):
        """
        """
        self.set_common_to_both = set(self.ls_of_tup_file1_nodup) & set(self.ls_of_tup_file2_nodup)
        self.set_unique_to_file1 = set(self.ls_of_tup_file1_nodup) - set(self.ls_of_tup_file2_nodup)
        self.set_unique_to_file2 = set(self.ls_of_tup_file2_nodup) - set(self.ls_of_tup_file1_nodup)

    def print_results(self, whose="all", show_n_evts=25, save_to_file=None):
        """Print info describing differences between two files.
        
        Parameters
        ----------
        whose : str
            "file1", "file2", "all"
        """
        print(f"Comparing {self.cr.upper()}:")
        print(f"file1: {self.file1}")
        print(f"file2: {self.file2}")

        print(f"{'n_evts total file1 (no dup): ':<25}{len(self.ls_of_tup_file1_nodup)}")
        print(f"{'n_evts total file2 (no dup): ':<25}{len(self.ls_of_tup_file2_nodup)}")
        print(f"{'n_evts in common: ':<25}{len(self.set_common_to_both)}")
        print(f"{'n_evts unique to file1: ':<25}{len(self.set_unique_to_file1)}")
        print(f"{'n_evts unique to file2: ':<25}{len(self.set_unique_to_file2)}")

        header = "#-- Run -- LumiSect -- Event --#"
        if show_n_evts == -1:
            show_n_evts = None
        if whose in ("file1", "all"):
            print(f"  file1's unique events:")
            print(header)
            pprint(list(self.set_unique_to_file1)[:show_n_evts])
            print()
        if whose in ("file2", "all"):
            print(f"  file2's unique events:")
            print(header)
            pprint(list(self.set_unique_to_file2)[:show_n_evts])
            print()

    def save_events_to_txt(self, kind, outtxt, no_dup=True):
        """
        Write the events to `outtxt` in the format:

        Run : LumiSect : Event

        Parameters
        ----------
        kind : str
            Choose which events to write to `outtxt`.
            "file1", "file2", "common", "file1_unique", "file2_unique"
        """
        assert kind in ("file1", "file2", "common", "file1_unique", "file2_unique")

        if kind in "file1":
            iter_ls_of_tup = self.ls_of_tup_file1_nodup if no_dup else self.ls_of_tup_file1
        elif kind in "file2":
            iter_ls_of_tup = self.ls_of_tup_file2_nodup if no_dup else self.ls_of_tup_file2
        elif kind in "common":
            iter_ls_of_tup = self.set_common_to_both
        elif kind in "file1_unique":
            iter_ls_of_tup = self.set_unique_to_file1
        elif kind in "file2_unique":
            iter_ls_of_tup = self.set_unique_to_file2

        with open(outtxt, "w") as f:
            f.write("# Run : LumiSect : Event\n")
            for tup in iter_ls_of_tup:
                f.write(f"{tup[0]} : {tup[1]} : {tup[2]}\n")
            print(f"Wrote '{self.cr} {kind}' events to file:\n{outtxt}")

In [28]:
infile_jake_tree = "/blue/avery/rosedj1/ZplusXpython/data/ZLL_CR_FRapplied/Data_2018_NoDuplicates_RunEventLumi.root"

infile_elisa       = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents.txt"
infile_elisa_2p2f  = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_2P2F_listOfEvents.txt"
infile_elisa_3p1f  = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_3P1F_listOfEvents.txt"

infile_jake      = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents_jake.txt"
infile_jake_2p2f = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents_jake_2P2F.txt"
infile_jake_3p1f = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents_jake_3P1F.txt"

outfile_elisa_2p2f_unique  = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_2P2F_listOfEvents_unique.txt"
outfile_elisa_3p1f_unique  = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_3P1F_listOfEvents_unique.txt"
outfile_jake_2p2f_unique = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents_jake_2P2F_unique.txt"
outfile_jake_3p1f_unique = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents_jake_3P1F_unique.txt"

outfile_2p2f_common = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents_2P2F_common.txt"
outfile_3p1f_common = "/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents_3P1F_common.txt"
# write_tree_info_to_txt(tree, infile_jake)

## Make txt files of events.

In [22]:
# write_tree_info_to_txt(infile_jake_tree, infile_jake_2p2f, keep_2P2F=True, keep_3P1F=False)
# write_tree_info_to_txt(infile_jake_tree, infile_jake_3p1f, keep_2P2F=False, keep_3P1F=True)

## Compare files.

In [29]:
# fc_jakevselisa_3p1f = FileComparer(infile_jake_3p1f, infile_elisa_3p1f, control_reg="3p1f")
# fc_jakevselisa_2p2f = FileComparer(infile_jake_2p2f, infile_elisa_2p2f, control_reg="2p2f")
# fc_jakevselisa_all = FileComparer(infile_jake, infile_elisa, control_reg="")

# fc_jakevselisa_3p1f.print_results(whose="file1", show_n_evts=-1)
# fc_jakevselisa_2p2f.print_results(whose="file2", show_n_evts=10)
# fc_jakevselisa_all.print_results(whose="all", show_n_evts=10)

fc_jakevselisa_3p1f.save_events_to_txt(kind="file1_unique", outtxt=outfile_jake_3p1f_unique, no_dup=True)
fc_jakevselisa_3p1f.save_events_to_txt(kind="file2_unique", outtxt=outfile_elisa_3p1f_unique, no_dup=True)
fc_jakevselisa_3p1f.save_events_to_txt(kind="common", outtxt=outfile_3p1f_common, no_dup=True)

fc_jakevselisa_2p2f.save_events_to_txt(kind="file1_unique", outtxt=outfile_jake_2p2f_unique, no_dup=True)
fc_jakevselisa_2p2f.save_events_to_txt(kind="file2_unique", outtxt=outfile_elisa_2p2f_unique, no_dup=True)
fc_jakevselisa_2p2f.save_events_to_txt(kind="common", outtxt=outfile_2p2f_common, no_dup=True)

Wrote 'file1_unique' events to file:
/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents_jake_3P1F_unique.txt
Wrote 'file2_unique' events to file:
/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_3P1F_listOfEvents_unique.txt
Wrote 'common' events to file:
/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents_3P1F_common.txt
Wrote 'file1_unique' events to file:
/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents_jake_2P2F_unique.txt
Wrote 'file2_unique' events to file:
/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_2P2F_listOfEvents_unique.txt
Wrote 'common' events to file:
/blue/avery/rosedj1/ZplusXpython/data/elisa/CRLLos_listOfEvents_2P2F_common.txt


In [165]:
# # Finding Elisa's duplicate 3P1F event:
# counter = Counter(fc_jakevselisa_3p1f.ls_of_tup_elis)
# pprint([k for k,v in counter.items() if v > 1])

# # Finding Elisa's duplicate 2P2F event:
# counter = Counter(fc_jakevselisa_2p2f.ls_of_tup_elis)
# pprint([k for k,v in counter.items() if v > 1])

# Finding ALL of Elisa's duplicate events:
# counter = Counter(fc_jakevselisa_all.ls_of_tup_elis)
# pprint([k for k,v in counter.items() if v == 4])


# # Finding Jake's duplicate 3P1F event:
counter = Counter(fc_jakevselisa_3p1f.ls_of_tup_jake)
pprint([k for k,v in counter.items() if v > 1])

# # Finding Jake's duplicate 2P2F event:
# counter = Counter(fc_jakevselisa_2p2f.ls_of_tup_jake)
# pprint([k for k,v in counter.items() if v > 1])

# Finding ALL of Jake's duplicate events:
# counter = Counter(fc_jakevselisa_all.ls_of_tup_jake)
# pprint([k for k,v in counter.items() if v == 4])

# pprint(f"len of ls_of_tup_elis  = {len(fc_jakevselisa_all.ls_of_tup_elis)}")
# print(f"len of set_of_tup_elis = {len(set(fc_jakevselisa_all.ls_of_tup_elis))}")

[(316059, 264, 291781001),
 (315420, 224, 173693574),
 (315705, 476, 320288284),
 (315770, 329, 459813961),
 (316240, 278, 390335437),
 (316059, 2, 1543686),
 (316590, 117, 160436503),
 (316590, 156, 219203038),
 (316569, 20, 13446010),
 (316111, 7, 6163919),
 (316758, 626, 866786910),
 (315506, 77, 91921103),
 (316718, 173, 266503769),
 (315713, 38, 24367865),
 (316239, 367, 465753806),
 (315973, 600, 707371743),
 (316590, 279, 393597622),
 (316271, 106, 117834495),
 (316380, 66, 46970628),
 (316457, 389, 442516703),
 (316666, 839, 1192941203),
 (316995, 120, 161611689),
 (316719, 41, 57584524),
 (316876, 270, 382427314),
 (317292, 306, 446000825),
 (317320, 192, 262193931),
 (317320, 261, 349193238),
 (317435, 591, 845742889),
 (317527, 966, 1373701503),
 (319756, 569, 924886538),
 (319449, 613, 889108115),
 (319503, 156, 240050059),
 (319524, 799, 1172395462),
 (319579, 1329, 1989114863),
 (319910, 700, 1254838029),
 (319910, 973, 1765786641),
 (320024, 116, 191728372),
 (320040, 61

### Duplicate event belongs to these possible data sets:

* /DoubleMuon/Run2018A-12Nov2019_UL2018-v2/MINIAOD
* /DoubleMuon/Run2018A-17Sep2018-v2/MINIAOD
* /DoubleMuon/Run2018A-PromptReco-v1/MINIAOD
* /DoubleMuon/Run2018A-UL2018_MiniAODv2-v1/MINIAOD
* /EGamma/Run2018A-12Nov2019_UL2018-v2/MINIAOD
* /EGamma/Run2018A-17Sep2018-v2/MINIAOD
* /EGamma/Run2018A-22Jun2018-v1/MINIAOD
* /EGamma/Run2018A-PromptReco-v1/MINIAOD
* /EGamma/Run2018A-UL2018_MiniAODv2-v1/MINIAOD
* /MuonEG/Run2018A-12Nov2019_UL2018_rsb-v1/MINIAOD
* /MuonEG/Run2018A-17Sep2018-v1/MINIAOD
* /MuonEG/Run2018A-PromptReco-v1/MINIAOD
* /MuonEG/Run2018A-UL2018_MiniAODv2-v1/MINIAOD
* /SingleMuon/Run2018A-06Jun2018-v1/MINIAOD
* /SingleMuon/Run2018A-12Nov2019_UL2018-v3/MINIAOD
* /SingleMuon/Run2018A-17Sep2018-v2/MINIAOD
* /SingleMuon/Run2018A-PromptReco-v1/MINIAOD
* /SingleMuon/Run2018A-UL2018_MiniAODv2-v2/MINIAOD





## Elisa has a unique event (that Jake doesn't have):
(321834, 84, 126135620)

### Data sets which contain this run

* /DoubleMuon/Run2018D-PromptReco-v2/MINIAOD
* /EGamma/Run2018D-22Jan2019-v2/MINIAOD
* /EGamma/Run2018D-PromptReco-v2/MINIAOD
* /MuonEG/Run2018D-PromptReco-v2/MINIAOD
* /SingleMuon/Run2018D-22Jan2019-v2/MINIAOD
* /SingleMuon/Run2018D-PromptReco-v2/MINIAOD




In [175]:
f = rt.TFile(infile_jake_tree)
t = f.Get("passedEvents")

dup_tup = (315420, 224, 173693574)

for ct, evt in enumerate(t):
    if evt.Run == dup_tup[0] and evt.LumiSect == dup_tup[1] and evt.Event == dup_tup[2]:
        t.Show(ct)

 Run             = 315420
 Event           = 173693574
 LumiSect        = 224
 finalState      = 3
 passedZ1LSelection = 0
 passedZXCRSelection = 1
 eventWeight     = 0.928322
 k_qqZZ_qcd_M    = 1
 k_qqZZ_ewk      = 1
 met             = 87.0738
 mass4l          = 129.299
 lep_Hindex      = (vector<int>*)0x558b636262a0
 lep_pt          = (vector<float>*)0x558b6277a160
 lep_eta         = (vector<float>*)0x558b5fac7f20
 lep_phi         = (vector<float>*)0x558b64fff9d0
 lep_mass        = (vector<float>*)0x558b6051c390
 lep_RelIsoNoFSR = (vector<float>*)0x558b642c39b0
 lep_id          = (vector<int>*)0x558b63bba2a0
 lep_tightId     = (vector<int>*)0x558b64e173b0
 lep_matchedR03_PdgId = (vector<int>*)0x558b63be6570
 lep_matchedR03_MomId = (vector<int>*)0x558b63875da0
 lep_matchedR03_MomMomId = (vector<int>*)0x558b64873f30
 vtxLepFSR_BS_pt = (vector<double>*)0x558b6345a470
 vtxLepFSR_BS_eta = (vector<double>*)0x558b61ac9af0
 vtxLepFSR_BS_phi = (vector<double>*)0x558b63669d40
 vtxLepFSR_BS_mas

In [173]:
t.Show(26)

 Run             = 316059
 Event           = 291781001
 LumiSect        = 264
 finalState      = 3
 passedZ1LSelection = 0
 passedZXCRSelection = 1
 eventWeight     = 0.892403
 k_qqZZ_qcd_M    = 1
 k_qqZZ_ewk      = 1
 met             = 42.9264
 mass4l          = 116.51
 lep_Hindex      = (vector<int>*)0x558b644ff940
 lep_pt          = (vector<float>*)0x558b655eca00
 lep_eta         = (vector<float>*)0x558b655ecf00
 lep_phi         = (vector<float>*)0x558b64e16e60
 lep_mass        = (vector<float>*)0x558b64e17360
 lep_RelIsoNoFSR = (vector<float>*)0x558b64e173d0
 lep_id          = (vector<int>*)0x558b64fffb10
 lep_tightId     = (vector<int>*)0x558b65010870
 lep_matchedR03_PdgId = (vector<int>*)0x558b650215d0
 lep_matchedR03_MomId = (vector<int>*)0x558b650214b0
 lep_matchedR03_MomMomId = (vector<int>*)0x558b65044b60
 vtxLepFSR_BS_pt = (vector<double>*)0x558b65056690
 vtxLepFSR_BS_eta = (vector<double>*)0x558b65056870
 vtxLepFSR_BS_phi = (vector<double>*)0x558b65077a60
 vtxLepFSR_BS_mass

In [174]:
t.Show(20826)

 Run             = 316059
 Event           = 291781001
 LumiSect        = 264
 finalState      = 3
 passedZ1LSelection = 0
 passedZXCRSelection = 1
 eventWeight     = 0.892403
 k_qqZZ_qcd_M    = 1
 k_qqZZ_ewk      = 1
 met             = 42.9264
 mass4l          = 116.51
 lep_Hindex      = (vector<int>*)0x558b644ff940
 lep_pt          = (vector<float>*)0x558b655eca00
 lep_eta         = (vector<float>*)0x558b655ecf00
 lep_phi         = (vector<float>*)0x558b64e16e60
 lep_mass        = (vector<float>*)0x558b64e17360
 lep_RelIsoNoFSR = (vector<float>*)0x558b64e173d0
 lep_id          = (vector<int>*)0x558b64fffb10
 lep_tightId     = (vector<int>*)0x558b65010870
 lep_matchedR03_PdgId = (vector<int>*)0x558b650215d0
 lep_matchedR03_MomId = (vector<int>*)0x558b650214b0
 lep_matchedR03_MomMomId = (vector<int>*)0x558b65044b60
 vtxLepFSR_BS_pt = (vector<double>*)0x558b65056690
 vtxLepFSR_BS_eta = (vector<double>*)0x558b65056870
 vtxLepFSR_BS_phi = (vector<double>*)0x558b65077a60
 vtxLepFSR_BS_mass