In [31]:
%matplotlib inline
import pandas as pd
import uproot
import awkward
import matplotlib.pyplot as plt
import numpy as np
import math
import glob
import numba
from matplotlib.colors import LogNorm
import uproot_methods
from tqdm.auto import tqdm

from utils import set_plotting_style, get_chunking, get_chunking_dask


try:
    from yahist import Hist1D, Hist2D
except:
    !pip install git+git://github.com/aminnj/yahist.git#egg=yahist -U
    from yahist import Hist1D, Hist2D
    
import utils

In [32]:
from dask import delayed
from dask.distributed import Client
client = Client("uaf-10.t2.ucsd.edu:50123")
client

0,1
Client  Scheduler: tcp://uaf-10.t2.ucsd.edu:50123  Dashboard: http://uaf-10.t2.ucsd.edu:8787/status,Cluster  Workers: 23  Cores: 23  Memory: 115.00 GB


In [33]:
set_plotting_style()

In [34]:
df_data = pd.read_pickle("data/df_data_allrho.pkl")
df_mc = pd.read_pickle("data/df_mc_allrho.pkl")

In [52]:
tofind = df_data.query("dimuon_mass>13 and angle3dmumu>1.5 and DV_rho>0.4")[["run","luminosityBlock","event"]]
tofind

Unnamed: 0,run,luminosityBlock,event
19681,319337,53,18071059
222647,319337,953,764383341
28265,319337,988,809720068
31878,319337,989,810630904
147556,319337,1013,841280953
...,...,...,...
58237,319678,209,292576462
222135,319678,219,312103889
258958,319678,222,317221893
72136,319678,231,332154747


In [53]:
# /hadoop/cms/store/group/snt/run2_data2018/DoubleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/
fnames = glob.glob(
    "/hadoop/cms/store/group/snt/run2_data2018/*_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/*.root"
)
len(fnames)

1543

In [54]:
# uproot.open(fnames[0])["Events"].keys()

In [55]:
def findevents(fname):
    rle = []
    try:
        t = uproot.open(fname)["Events"]
        run, lumi, event = t.arrays([
            "uint_eventMaker_evtrun_CMS3.obj",
            "uint_eventMaker_evtlumiBlock_CMS3.obj",
            "ull_eventMaker_evtevent_CMS3.obj"],outputtype=tuple)
        good = tofind["run"].isin(run) & tofind["luminosityBlock"].isin(lumi) & tofind["event"].isin(event)
        rle = tofind[good].values.tolist()
    except:
        pass
    return dict(fname=fname,rle=rle)
findevents(fnames[0])#.compute()

{'fname': '/hadoop/cms/store/group/snt/run2_data2018/DoubleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_1.root',
 'rle': []}

In [56]:
results = client.gather(client.map(findevents, fnames[:2000]))

In [73]:
dfrle = pd.DataFrame(results)
dfrle = dfrle[~dfrle["rle"].apply(lambda x:x==[])]
dfrle.style.applymap(lambda x:str(x))

Unnamed: 0,fname,rle
46,/hadoop/cms/store/group/snt/run2_data2018/DoubleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_20.root,"[[319639, 670, 1023676703]]"
714,/hadoop/cms/store/group/snt/run2_data2018/JetHT_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_119.root,"[[319678, 95, 105419509]]"
802,/hadoop/cms/store/group/snt/run2_data2018/JetHT_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_199.root,"[[319639, 897, 1313985232]]"
961,/hadoop/cms/store/group/snt/run2_data2018/MET_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_104.root,"[[319678, 209, 292576462]]"
1145,/hadoop/cms/store/group/snt/run2_data2018/SingleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_113.root,"[[319639, 207, 296757079], [319658, 155, 249146307]]"
1189,/hadoop/cms/store/group/snt/run2_data2018/SingleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_153.root,"[[319656, 305, 399023585]]"
1340,/hadoop/cms/store/group/snt/run2_data2018/SingleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_29.root,"[[319678, 222, 317221893]]"
1430,/hadoop/cms/store/group/snt/run2_data2018/SingleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_370.root,"[[319639, 1208, 1745449884]]"
1476,/hadoop/cms/store/group/snt/run2_data2018/SingleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_411.root,"[[319639, 446, 698133909]]"
1511,/hadoop/cms/store/group/snt/run2_data2018/SingleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_70.root,"[[319678, 110, 130628044]]"


In [92]:
print(
    "skim.py ",
    " ".join(dfrle.fname.values),
    " -t Events ",
    " -c '",
    "||".join(["(evt_run=={} && evt_lumiBlock=={} && evt_event=={})".format(*rle) for rle in dfrle.rle.sum()]),
    "'",
)

dfscout = pd.concat([df_data.query("run=={} and luminosityBlock=={} and event=={}".format(*rle))
           for rle in dfrle.rle.sum()])
dfscout

skim.py  /hadoop/cms/store/group/snt/run2_data2018/DoubleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_20.root /hadoop/cms/store/group/snt/run2_data2018/JetHT_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_119.root /hadoop/cms/store/group/snt/run2_data2018/JetHT_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_199.root /hadoop/cms/store/group/snt/run2_data2018/MET_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_104.root /hadoop/cms/store/group/snt/run2_data2018/SingleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_113.root /hadoop/cms/store/group/snt/run2_data2018/SingleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_153.root /hadoop/cms/store/group/snt/run2_data2018/SingleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_29.root /hadoop/cms/store/group/snt/run2_data2018/SingleMuon_Run2018C-17Sep2018-v1_MINIAOD_CMS4_V10-02-04/merged_ntuple_370.root /hadoop/cms/store/group/snt/run2_data2018/

Unnamed: 0,nDV,nDV_good,nJet,nPV,nPVM,nMuon,nMuon_good,nGenPart,nGenMuon,pass_skim,...,DV_redchi2,DV_xyErrorMax,Muon1_redchi2,Muon2_redchi2,Muon1_adqpt,Muon2_adqpt,maxabsdxy,minabsdxy,maxdxy,mindxy
64129,1,1,1,16,6,2,2,0,0,True,...,3.712486,0.002083,0.587692,2.513063,0.013479,0.015098,0.001163,6.9e-05,0.001163,6.9e-05
254877,1,1,0,16,2,2,2,0,0,True,...,0.645704,0.003495,0.630884,0.801515,0.013683,0.01431,0.020882,0.001728,0.020882,-0.001728
246350,1,1,2,8,4,2,2,0,0,True,...,4.484245,0.002555,0.974408,0.586053,0.011273,0.011363,0.009989,0.00397,0.009989,0.00397
58237,1,1,0,21,5,2,2,0,0,True,...,7.841395,0.001577,2.60704,1.645813,0.018832,0.015696,0.023273,0.005591,0.023273,0.005591
58140,1,1,1,16,2,2,2,0,0,True,...,0.251404,0.003417,0.807644,0.805133,0.022473,0.044508,0.000667,0.000337,0.000667,0.000337
98743,1,1,1,15,5,2,2,0,0,True,...,9.623866,0.001389,0.797332,2.047328,0.022337,0.015394,0.017012,0.002313,0.017012,-0.002313
211905,1,1,0,13,4,2,2,0,0,True,...,1.492501,0.002611,0.901055,1.048795,0.018831,0.010127,0.018751,0.002034,0.002034,-0.018751
258958,1,1,1,10,3,2,2,0,0,True,...,4.373765,0.001683,0.690146,0.969874,0.013953,0.011488,0.002744,0.001794,0.001794,-0.002744
163362,1,1,0,9,2,2,2,0,0,True,...,3.783675,0.002431,0.653546,0.774016,0.024161,0.018996,0.016287,0.000617,0.016287,-0.000617
74474,1,1,0,17,2,2,2,0,0,True,...,0.000165,0.002107,0.802013,1.863577,0.011548,0.022198,0.006134,0.003356,0.006134,-0.003356


In [153]:
dfscout[[
    "run","luminosityBlock","event",
    "Muon1_pt","Muon1_eta","Muon1_phi",
    "Muon2_pt","Muon2_eta","Muon2_phi",
]].reset_index(drop=True)

Unnamed: 0,run,luminosityBlock,event,Muon1_pt,Muon1_eta,Muon1_phi,Muon2_pt,Muon2_eta,Muon2_phi
0,319639,670,1023676703,10.628433,-1.047891,-0.734558,4.416438,1.234232,-0.736498
1,319678,95,105419509,5.338997,-0.885285,-2.414602,3.274397,1.670738,-2.457687
2,319639,897,1313985232,11.957077,1.146923,1.525702,5.943577,-0.871864,1.513131
3,319678,209,292576462,11.276275,-1.495154,1.286327,7.625919,1.28885,1.277313
4,319639,207,296757079,31.300734,2.009207,-1.151984,7.736771,-2.296393,-1.151716
5,319658,155,249146307,40.329353,-1.617615,-0.661665,9.643002,1.094859,-0.674573
6,319656,305,399023585,25.639217,-1.052147,1.133269,6.264578,0.830886,1.162845
7,319678,222,317221893,32.132786,-1.197004,0.89086,7.341533,0.931144,0.896174
8,319639,1208,1745449884,41.391056,1.059134,-1.997069,6.703817,-2.018955,-2.021108
9,319639,446,698133909,34.833851,0.860745,-2.075229,3.924639,-2.334232,-2.087784


In [170]:
dfcms4 = pd.read_csv("skim2_info.csv")
dfcms4.columns = dfcms4.columns.str.strip()
dfcms4.columns

Index(['run', 'luminosityBlock', 'event', 'pt', 'eta', 'phi', 'istight',
       'ispfmuon', 'isglobalmuon', 'chi2', 'standalonehits', 'matchedstations',
       'validpixelhits', 'trackerlayers', 'dxyPV', 'dzPV', 'ecaltime',
       'hcaltime', 'trkkink', 'is medium'],
      dtype='object')

In [171]:
dfcms4good = []
for irow,row in dfcms4.iterrows():
    q = (
        "run==@row.run "
        " and luminosityBlock==@row.luminosityBlock "
        " and (   ((Muon1_eta-@row.eta)**2 + (Muon1_phi-@row.phi)**2)**0.5 < 0.05"
        "      or ((Muon2_eta-@row.eta)**2 + (Muon2_phi-@row.phi)**2)**0.5 < 0.05 )"
        )
    df = dfscout.query(q)
    if len(df) == 0: continue
    dfcms4good.append(row)
dfcms4good = pd.DataFrame(dfcms4good).sort_values(["event","pt"],ascending=False).reset_index(drop=True)
dfcms4good

Unnamed: 0,run,luminosityBlock,event,pt,eta,phi,istight,ispfmuon,isglobalmuon,chi2,standalonehits,matchedstations,validpixelhits,trackerlayers,dxyPV,dzPV,ecaltime,hcaltime,trkkink,is medium
0,319639.0,1208.0,1745000000.0,41.281005,1.059181,-1.997107,1.0,1.0,1.0,1.080289,20.0,4.0,4.0,10.0,-4.1e-05,-0.002152,0.0,1.202148,4.746208,1.0
1,319639.0,1208.0,1745000000.0,6.687177,-2.018142,-2.021076,0.0,1.0,1.0,1.052847,22.0,4.0,6.0,13.0,0.010327,3.471367,6.629883,0.066803,6.623426,1.0
2,319639.0,446.0,698133900.0,34.809536,0.860718,-2.075232,1.0,1.0,1.0,0.643148,35.0,3.0,4.0,13.0,6e-06,0.001783,0.0,0.107681,4.237413,1.0
3,319639.0,446.0,698133900.0,4.437724,-2.332167,-2.08578,0.0,1.0,1.0,1.756314,17.0,4.0,5.0,12.0,0.000938,4.530557,0.0,-0.749633,9.617173,1.0
4,319656.0,305.0,399023600.0,26.632646,-1.051928,1.132974,1.0,1.0,1.0,0.758996,29.0,4.0,5.0,12.0,0.000342,-0.002481,5.723633,0.702759,16.507167,1.0
5,319656.0,305.0,399023600.0,6.265715,0.830872,1.162901,0.0,1.0,1.0,1.439865,37.0,0.0,4.0,14.0,-0.019096,-1.530651,-14.97851,0.312927,4.803157,0.0
6,319678.0,222.0,317221900.0,32.137844,-1.197009,0.890849,1.0,1.0,1.0,2.249054,18.0,3.0,3.0,11.0,-0.000777,0.003989,0.0,0.232727,6.598489,1.0
7,319678.0,222.0,317221900.0,7.316002,0.931301,0.896043,0.0,1.0,1.0,1.103778,28.0,1.0,4.0,12.0,0.000608,-2.196708,0.0,-0.091873,6.219835,0.0
8,319639.0,207.0,296757100.0,31.009424,2.009292,-1.151901,1.0,1.0,1.0,0.963761,18.0,4.0,6.0,15.0,0.01266,-0.00208,0.0,-0.19699,4.265247,1.0
9,319639.0,207.0,296757100.0,7.931679,-2.293881,-1.150497,0.0,1.0,1.0,1.112121,24.0,4.0,6.0,13.0,0.000978,10.452304,0.0,0.0,12.333965,1.0


In [174]:
def docolor(which,threshold):
    def f(val):
        bad = False
        if which == "lt": bad = abs(val) < threshold
        else: bad = abs(val) > threshold
        return "background-color: #FF928B" if bad else ""
    return f

(dfcms4good.style
 .applymap(docolor("lt",0.5),subset=["isglobalmuon","is medium"])
 .applymap(docolor("lt",0.5),subset=["istight","ispfmuon","standalonehits","validpixelhits"])
 .applymap(docolor("gt",10),subset=["chi2"])
 .applymap(docolor("lt",2),subset=["matchedstations"])
 .applymap(docolor("lt",6),subset=["trackerlayers"])
 .applymap(docolor("gt",0.2),subset=["dxyPV"])
 .applymap(docolor("gt",0.5),subset=["dzPV"])
#  .applymap(docolor("ispfmuon"),subset="ispfmuon")
#  .applymap(docolor("isglobalmuon"),subset="isglobalmuon")
)
# dfs = dfcms4good
# for k in ["istight","ispfmuon","isglobalmuon","chi2","standalonehits","matchedstations","validpixelhits","trackerlayers","dxyPV","dzPV"]:
#     dfs = dfs.style.applymap(docolor("istight"),subset=["istight"])

Unnamed: 0,run,luminosityBlock,event,pt,eta,phi,istight,ispfmuon,isglobalmuon,chi2,standalonehits,matchedstations,validpixelhits,trackerlayers,dxyPV,dzPV,ecaltime,hcaltime,trkkink,is medium
0,319639,1208,1745000000.0,41.281,1.05918,-1.99711,1,1,1,1.08029,20,4,4,10,-4.15e-05,-0.002152,0.0,1.20215,4.74621,1
1,319639,1208,1745000000.0,6.68718,-2.01814,-2.02108,0,1,1,1.05285,22,4,6,13,0.0103268,3.47137,6.62988,0.0668029,6.62343,1
2,319639,446,698134000.0,34.8095,0.860718,-2.07523,1,1,1,0.643148,35,3,4,13,6.44e-06,0.001783,0.0,0.107681,4.23741,1
3,319639,446,698134000.0,4.43772,-2.33217,-2.08578,0,1,1,1.75631,17,4,5,12,0.0009376,4.53056,0.0,-0.749633,9.61717,1
4,319656,305,399024000.0,26.6326,-1.05193,1.13297,1,1,1,0.758996,29,4,5,12,0.0003425,-0.002481,5.72363,0.702759,16.5072,1
5,319656,305,399024000.0,6.26571,0.830872,1.1629,0,1,1,1.43986,37,0,4,14,-0.019096,-1.53065,-14.9785,0.312927,4.80316,0
6,319678,222,317222000.0,32.1378,-1.19701,0.890849,1,1,1,2.24905,18,3,3,11,-0.000777,0.0039889,0.0,0.232727,6.59849,1
7,319678,222,317222000.0,7.316,0.931301,0.896043,0,1,1,1.10378,28,1,4,12,0.0006085,-2.19671,0.0,-0.091873,6.21983,0
8,319639,207,296757000.0,31.0094,2.00929,-1.1519,1,1,1,0.963761,18,4,6,15,0.0126602,-0.00208,0.0,-0.19699,4.26525,1
9,319639,207,296757000.0,7.93168,-2.29388,-1.1505,0,1,1,1.11212,24,4,6,13,0.0009777,10.4523,0.0,0.0,12.334,1


distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
distributed.utils - 

distributed.utils - ERROR - 
Traceback (most recent call last):
  File "/home/users/namin/miniconda3/envs/analysisenv/lib/python3.7/site-packages/distributed/utils.py", line 666, in log_errors
    yield
  File "/home/users/namin/miniconda3/envs/analysisenv/lib/python3.7/site-packages/distributed/client.py", line 998, in _reconnect
    await self._close()
  File "/home/users/namin/miniconda3/envs/analysisenv/lib/python3.7/site-packages/distributed/client.py", line 1268, in _close
    await gen.with_timeout(timedelta(seconds=2), list(coroutines))
concurrent.futures._base.CancelledError
distributed.utils - ERROR - 
Traceback (most recent call last):
  File "/home/users/namin/miniconda3/envs/analysisenv/lib/python3.7/site-packages/distributed/utils.py", line 666, in log_errors
    yield
  File "/home/users/namin/miniconda3/envs/analysisenv/lib/python3.7/site-packages/distributed/client.py", line 998, in _reconnect
    await self._close()
  File "/home/users/namin/miniconda3/envs/analysisen