In [77]:
%matplotlib inline
import pandas as pd
import uproot
import awkward
import matplotlib.pyplot as plt
import numpy as np
import math
import glob
import numba
from matplotlib.colors import LogNorm
import uproot_methods
from tqdm.auto import tqdm

from utils import set_plotting_style, get_chunking, get_chunking, smaller_dtypes, make_df, hist2d_dask
import utils

import scipy.stats
import concurrent.futures
from dask.distributed import Client
import dask.dataframe as dd
import dask.array as da
from dask.distributed import progress
from dask import delayed
from yahist import Hist1D, Hist2D

In [78]:
set_plotting_style()

In [79]:
def make_df_new(fname, entrystart=None, entrystop=None):
    t = uproot.open(fname)["Events"]
    branches = ["pass_*","lxy", "nDV_raw","nMuon_raw",
                     "/Muon(1|2)_n(Valid|Matched|Tracker|Expected).*/",
                     "/Muon(1|2)_(pt|eta|phi|m|trackIso|charge|dz.*|dxy.*|chi2|ndof|drjet|pass*)$/",
                     "/DV_(chi2|ndof|rho.*|inPixel.*|x|y|z|xError|yError|zError|pass*|distPixel)$/",
                     "run","luminosityBlock","event",
                     "PVM_x","PVM_y",
                     "BS_x","BS_y",
                     "dimuon_*","cosphi*","absdphi*","minabs*","logabs*",
                     "L1_*",
                    ]
    arrs = t.arrays(branches,outputtype=dict,namedecode="ascii",entrystart=entrystart,entrystop=entrystop)
    
    # flatten into dataframe and require `sel`
    df = pd.DataFrame()
    for k in arrs.keys():
        if any(k.startswith(y) for y in ["n","lxy","pass_","BS_","MET_","run","lumi","event","L1_",
                                        "dimuon","cosphi","absdphi","minabs","logabs",
                                         "Muon","DV","PVM"
                                        ]):
            df[k] = arrs[k]

    df["excesshits"] = ~df["pass_excesshits"]
    df["Muon1_excesshits"] = df["Muon1_nValidPixelHits"] - df["Muon1_nExpectedPixelHits"]
    df["Muon2_excesshits"] = df["Muon2_nValidPixelHits"] - df["Muon2_nExpectedPixelHits"]
    
    for name,dtype in smaller_dtypes:
        if name not in df.columns: continue
        df[name] = df[name].astype(dtype, copy=False)
    return df

dfsall = make_df_new("/home/users/namin/2019/scouting/repo/batch/output_synch_data2017_v5.root")

dfs = dfsall.query("pass_baseline_iso").rename({"luminosityBlock":"lumi","dimuon_mass":"mass"},axis=1).reset_index(drop=True)

In [80]:
# fname = "~/public_html/dump/scouting/synchronization/data_2017D_v3.csv"
# fname = "~/public_html/dump/scouting/synchronization/data_2018C_v1.csv"
# fname = "~/public_html/dump/scouting/synchronization/mc_2017_mphi2GeV_ctau0p5mm_v1.csv"

# fname = "~/public_html/dump/scouting/synchronization/data_2017D_v4.csv"
# fname = "~/public_html/dump/scouting/synchronization/mc_2017_mphi2GeV_ctau0p5mm_v4.csv"
# dfs.to_csv(fname,index=False)
# !head -n 10 {fname}

In [81]:
# dfr = pd.read_csv("/home/users/namin/2019/scouting/repo/analysis/synch/flat_dimuon_tree.csv",delimiter=";",skiprows=1)
# dfr = pd.read_csv("/home/users/namin/2019/scouting/repo/analysis/synch/flat_dimuon_tree_BScorr.csv",delimiter=";",skiprows=1)
# dfr = pd.read_csv("/home/users/namin/2019/scouting/repo/analysis/synch/flat_dimuon_tree_synchtest.csv",delimiter=";",skiprows=1)
# dfr = pd.read_csv("/home/users/namin/2019/scouting/repo/analysis/synch/flat_dimuon_tree_synchtest_allcuts.csv",delimiter=";",skiprows=1)
# dfr = pd.read_csv("/home/users/namin/2019/scouting/repo/analysis/synch/flat_dimuon_tree_synchtest_2018_bugfix.csv",delimiter=";",skiprows=1)
# dfr = pd.read_csv("/home/users/namin/2019/scouting/repo/analysis/synch/flat_dimuon_tree_synchtest_MC_ggphimumu_m2_ct0p5.csv",delimiter=";",skiprows=1)
# dfr = pd.read_csv("/home/users/namin/2019/scouting/repo/analysis/synch/flat_dimuon_tree_2017_v2.csv",delimiter=";",skiprows=1)
# dfr = pd.read_csv("/home/users/namin/2019/scouting/repo/analysis/synch/flat_dimuon_tree_2018_v2.csv",delimiter=";",skiprows=1)
dfr = pd.read_csv("/home/users/namin/2019/scouting/repo/analysis/synch/flat_dimuon_tree_2017_v5.csv",delimiter=";",skiprows=1)
# dfr = dfr.drop_duplicates(["lumi","event_num"]).reset_index(drop=True)

dfr["event"] = dfr["event_num"].astype(int)
dfr


Unnamed: 0,run,lumi,event_num,dimuon_pt,dimuon_mass,lxy,dphidimudv,detadimudv,ctau,muon1_dxy,...,DVyerr,DVzerr,DV_chi2overndof,distPixel,detamumu,dphimumu,dRmumu,dRmuon1jet,dRmuon2jet,event
0,302033,1,275917.0,7.365117,1.998552,0.096444,0.004872,-0.001451,0.026170,0.101965,...,0.019945,0.053009,0.000049,999.0,0.410277,-0.336688,0.530741,9999.000000,9999.0,275917
1,302033,1,687055.0,13.819388,2.843702,0.020191,-0.014000,0.157834,0.004154,-0.053929,...,0.003948,0.022427,0.056620,999.0,-0.425033,0.134935,0.445937,9999.000000,9999.0,687055
2,302033,1,480897.0,9.214244,0.640106,0.111449,-0.002915,-0.030687,0.007742,-0.079933,...,0.038242,0.099370,1.870853,999.0,0.129199,-0.026325,0.131854,9999.000000,9999.0,480897
3,302033,1,710535.0,12.948071,3.216195,0.333688,0.018529,-0.129182,0.082871,-0.113011,...,0.009622,0.045973,0.047818,999.0,0.286485,0.434426,0.520384,9999.000000,9999.0,710535
4,302033,1,201210.0,13.082446,4.552272,0.017233,0.007500,0.320138,0.005996,-0.010908,...,0.005389,0.023021,0.120651,999.0,0.544375,0.512283,0.747515,3.505961,9999.0,201210
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7014,302033,39,31613749.0,12.564462,1.213843,0.084975,0.014364,-0.065160,0.008209,-0.043006,...,0.026072,0.029989,0.075247,999.0,0.201262,-0.034781,0.204245,9999.000000,9999.0,31613749
7015,302033,39,31731494.0,11.358769,3.090746,0.150051,-0.011826,0.034342,0.040826,0.093456,...,0.012368,0.021971,0.034338,999.0,-0.386319,-0.442521,0.587425,9999.000000,9999.0,31731494
7016,302033,39,31645079.0,8.101517,1.039076,0.242937,0.015369,-0.684589,0.031155,-0.058353,...,0.021570,0.046906,0.316120,999.0,-0.252723,0.057302,0.259138,9999.000000,9999.0,31645079
7017,302033,39,31732675.0,8.177569,0.975317,0.477060,0.012982,-0.200139,0.056893,-0.026025,...,0.022654,0.047548,0.128291,999.0,-0.123281,-0.196771,0.232201,9999.000000,9999.0,31732675


In [82]:
print(len(dfr))
print(len(dfs))

7019
7019


In [83]:
dfs.columns
# dfr["muon1_excesspixelhits"]

Index(['pass_l1', 'pass_fiducialgen', 'pass_excesshits', 'pass_materialveto',
       'pass_fiducialgen_norho', 'pass_baseline', 'pass_baseline_iso',
       'pass_baseline_isohalf', 'pass_baseline_extra',
       'pass_baseline_extra_iso', 'pass_baseline_extra_isohalf', 'lxy',
       'nDV_raw', 'nMuon_raw', 'Muon1_nValidMuonHits', 'Muon1_nValidPixelHits',
       'Muon1_nMatchedStations', 'Muon1_nTrackerLayersWithMeasurement',
       'Muon1_nValidStripHits', 'Muon1_nExpectedPixelHits',
       'Muon2_nValidMuonHits', 'Muon2_nValidPixelHits',
       'Muon2_nMatchedStations', 'Muon2_nTrackerLayersWithMeasurement',
       'Muon2_nValidStripHits', 'Muon2_nExpectedPixelHits', 'Muon1_pt',
       'Muon1_eta', 'Muon1_phi', 'Muon1_m', 'Muon1_trackIso', 'Muon1_chi2',
       'Muon1_ndof', 'Muon1_charge', 'Muon1_dxy', 'Muon1_dz', 'Muon1_dxyError',
       'Muon1_dzError', 'Muon1_dxyCorr', 'Muon1_drjet', 'Muon2_pt',
       'Muon2_eta', 'Muon2_phi', 'Muon2_m', 'Muon2_trackIso', 'Muon2_chi2',
       'Muon

In [84]:
# dfr.columns
(dfr["muon1_excesspixelhits"] == dfs["Muon1_excesshits"]).mean()
(dfr["muon2_excesspixelhits"] == dfs["Muon2_excesshits"]).mean()


1.0

In [85]:
df = pd.concat([dfr[["event","distPixel","DVx"]].rename({"event":"event_r"},axis=1), dfs[["event","DV_distPixel","DV_x"]].rename({"event":"event_s"},axis=1)], axis=1)
df
# df.query("event_r != event_s")
df.query("DV_distPixel < 999.")

Unnamed: 0,event_r,distPixel,DVx,event_s,DV_distPixel,DV_x
537,1742718,0.009078,1.947917,1742718,0.009078,1.947917
1195,5642886,0.008563,-0.04908,5642886,0.008563,-0.04908
2039,9372885,0.137568,-2.355789,9372885,0.137568,-2.355789
2670,13072236,0.02789,2.667628,13072236,0.02789,2.667628
2798,13336957,0.012208,6.65697,13336957,0.012208,6.656971
4626,20883201,0.002037,0.644338,20883201,0.002037,0.644338
5653,25007278,0.014113,-4.25028,25007278,0.014113,-4.25028
5717,26296389,0.008205,0.866193,26296389,0.008205,0.866193
6340,28318000,0.014623,-0.714782,28318000,0.014623,-0.714782
6984,31760646,0.02089,2.069167,31760646,0.02089,2.069167


In [29]:
len(set(dfr["event_num"].values) & set(dfs["event"].values))

5279

In [32]:
df = dfr[["event_num","distPixel"]].merge(dfs[["event","DV_distPixel"]],on="event")
df
# df.query("distPixel<0.2")

Unnamed: 0,event,distPixel,DV_distPixel
0,275917,2.643746,999.0
1,687055,2.639595,999.0
2,480897,2.572551,999.0
3,710535,2.309263,999.0
4,201210,2.629519,999.0
...,...,...,...
5275,31869714,2.596141,999.0
5276,31607858,2.294939,999.0
5277,31652688,2.647378,999.0
5278,31700178,2.255802,999.0


## New flattened babies (considers multiple DVs, muons)

In [154]:
def dump_df(df, fname):
    df = df.query("pass_baseline_iso")[[
        "event","luminosityBlock","run",
        "dimuon_mass",
        "DV_x","DV_y","DV_rhoCorr","lxy",
        "Muon1_pt","Muon1_eta","Muon2_pt","Muon2_eta",
        "absdphimumu","absdphimudv",
        "logabsetaphi","minabsdxy","excesshits",
        "dimuon_pt",
        "Muon1_dxyCorr",
        "Muon2_dxyCorr",
        "BS_x",
        "BS_y",
    ]].rename({"luminosityBlock":"lumi","dimuon_mass":"mass"},axis=1).reset_index(drop=True)
    df.to_csv(fname,index=False)
    
# df_data = make_df_new("/home/users/namin/2019/scouting/repo/batch/output_synch_data2017_v4.root")
# df_mc = make_df_new("/home/users/namin/2019/scouting/repo/batch/output_synch_mc2017_v4.root")

# dump_df(df_data, "~/public_html/dump/scouting/synchronization/data_2017D_v4.csv")
# dump_df(df_mc, "~/public_html/dump/scouting/synchronization/mc_2017_mphi2GeV_ctau0p5mm_v4.csv")