In [None]:
import uproot as upr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yaml
#from ROOT import vector, RDataFrame, RDF, TFile, TH1F, TH2F, gInterpreter, TMath
from ROOT import TFile, TH1F
from src.general.utils import toVector, get_files, prep_filelist
from time import perf_counter

In [None]:
#GenPart_pdgId, GenPart_pt, and GenPart_statusFlags bit 7 or 8

In [None]:
#set up metadata for files
era = '2016'
deepflavour = False
deepcsv = True
extras = False
csv_v_flavor=False
if era == "2016":
    fname = "samplesCR_2016_Apr2020.yml"
    bDiscValue = 0.3093
if era == "2017":
    fname = "samplesCR_2017_Apr2020.yml"
    bDiscValue = 0.3033
if era == "2018":
    fname = "samplesCR_2018_Apr2020.yml"
    bDiscValue = 0.2770


sampleDir = "samples"
outname = fname.replace('.yml','.root')
fname = "{}/{}".format(sampleDir, fname)
outname, fname

In [None]:
sample_path ='/eos/cms/store/group/phys_exotica/bffZprime/nanoAODskimmed/crab_bffv2/{}/{{}}'.format(era)
eff_path ='/eos/cms/store/group/phys_exotica/bffZprime/nanoAODskimmed/crab_bff_eff/{}/{{}}'.format(era)

class sample_processor():
    '''Class that takes file name from yml and helps manage samples.'''
    def __init__(self,file_name,outname,bDiscValue,is_inclusive=0):
        #load config
        self.file_name = file_name
        with open(file_name,'r') as f:
            self.sample_dict = yaml.load(f, Loader=yaml.FullLoader)
        #setup outfile
        self.outname = outname
        self.out = TFile(outname, 'recreate')
        self.outdirs_dict = {}
        for sample in self.samples():
            name = sample['name']
            self.outdirs_dict[name] = self.out.mkdir(name)
        self.lumi = self.sample_dict['lumi']
        #get and write lumi info
        hlumi = TH1F("lumi", "lumi", 1, 0, 1)
        hlumi.SetDirectory(self.out)
        hlumi.SetBinContent(1, self.lumi)
        hlumi.Write()
        self.bDiscValue = bDiscValue
        self.is_inclusive = is_inclusive
    def samples(self):
        return self.sample_dict['samples']
    def sample_names(self):
        return [s['name'] for s in self.samples()]
    def close(self):
        self.out.Close()
    def __repr__(self):
        text_dict = {"fn":self.file_name,
                     "on":self.outname, 
                     "lumi":self.lumi,
                    "samples": self.sample_names()}
        return '''from {fn} to {on}\nlumi: {lumi}\nSamples {samples}'''.format(**text_dict)

In [None]:
sp = sample_processor(fname, outname, bDiscValue)

In [None]:
def check_bitwise(value, n):
    return value & (1 << n)== 1 << n
def hard_process(value):
    return (check_bitwise(value, 7) or check_bitwise(value, 8)) 

def b_jet(value):
    return abs(value)==5

def s_jet(value):
    return abs(value)==3

def gluon(value):
    return abs(value)==21

def min_phi(phi1, phi2):
    diff = (phi1-phi2 + 3.1415) % (2 * 3.1415) - 3.1415
    if diff < - 3.1415:
        return diff + 3.1415*2
    return diff

def deltaR_row(row1, row2):
    return deltaR(row1.GenPart_eta, row2.GenPart_eta, row1.GenPart_phi, row2.GenPart_phi)

def deltaR(eta1, eta2, phi1, phi2):
    return ((eta1-eta2)**2+ min_phi(phi1, phi2)**2)**.5


def is_in_list(arr, row, tdf):
    for i in arr:
        row2 = tdf.loc[i]
        if row2.GenPart_pdgId != row.GenPart_pdgId: continue
        dr = deltaR_row(row2, row)
        if dr <= .4: return True
    return False


def find_duplicates(_tdf):
    part_filter = []
    particles = []
    for i, row in _tdf.iterrows():
        accounted_for = is_in_list(particles, row, _tdf)
        if not accounted_for: particles.append(i)
        part_filter.append(not accounted_for)
    return part_filter

def isKthBitSet(value, k):
    return bool(value & (1 << k))

def which_bits(value):
    string = ""
    for i in range(22):
        if isKthBitSet(value, i): string+="{},".format(i)
    return string[:-1]

def return_jet_multiplicty(nevents):
    event = df.loc[nevents]
    ishardprocess_flag = event.GenPart_statusFlags.apply(hard_process)
    event = event[ishardprocess_flag]
    initial = event[event.GenPart_pt==0]
    
    outgoing = event[event.GenPart_pt>0]
    
    ishardprocess_flag = outgoing.GenPart_statusFlags.apply(hard_process)
    ishardprocess = outgoing[ishardprocess_flag]
    dedupe = find_duplicates(ishardprocess)
    ishardprocess = ishardprocess[dedupe]
    isbjet = ishardprocess.GenPart_pdgId.apply(b_jet)
    issjet = ishardprocess.GenPart_pdgId.apply(s_jet)
    b_and_s_jets = ishardprocess[isbjet|issjet]
    b_and_s_jets = b_and_s_jets.sort_values("GenPart_pt")

    highestpt_bplus = 0
    highestpt_bminus = 0
    
    
    highestpt_b = 0
    higestb_eta = 0
    higestb_phi = 0
    
    highestpt_s = 0
    nbs = 0
    for i, part in b_and_s_jets.iterrows():
        pdgid = part.GenPart_pdgId
        pt = part.GenPart_pt
        if (pdgid==5) and (pt>highestpt_bplus):  highestpt_bplus=pt
        if (pdgid==-5) and (pt>highestpt_bminus):  highestpt_bminus=pt
        if (abs(pdgid)==5) and (pt>highestpt_b):
            highestpt_b=pt
            higestb_eta=part.GenPart_eta
            higestb_phi=part.GenPart_phi
        if (abs(pdgid)==3) and (pt>highestpt_s):  highestpt_s=pt 
            
    # second highes b pt
    secondHighestpt_b = 0 
    for i, part in b_and_s_jets.iterrows():
        pdgid = part.GenPart_pdgId
        pt = part.GenPart_pt  
        eta=part.GenPart_eta
        phi=part.GenPart_phi
        if abs(pdgid)!=5: continue
        if deltaR(eta, higestb_eta, phi, higestb_phi) < 0.4: continue
        if (pt<highestpt_b)and (pt>secondHighestpt_b): secondHighestpt_b=pt
    nbjet = isbjet.sum()
    nsjet = issjet.sum()

    #initial :
    inbjet = initial.GenPart_pdgId.apply(b_jet).sum()
    insjet = initial.GenPart_pdgId.apply(s_jet).sum()
    inOtherQuarks = initial.GenPart_pdgId.apply(lambda x: abs(x) <=9).sum()-inbjet-insjet
    charge_modulo = 1

    #for i, quark in quarks.iterrows():
    #    charge_modulo *= quark.GenPart_pdgId

    #charge_modulo = charge_modulo < 0
    ingluon = initial.GenPart_pdgId.apply(gluon).sum()
    
    # 0j: 0
    # 1b: 1
    # 1s: 2
    # 1b+1s: 3
    # 2b: 4
    # other: -1
    multiplicity=-1
    if (inbjet==2) and (insjet==0) and (ingluon==0):
        # 5 5 > 1b+1s
        if (highestpt_b>0) and (highestpt_s>0): multiplicity=3
        # 5 5 > 0 b
        elif charge_modulo: multiplicity=0
    if (inbjet==1) and (insjet==1) and (ingluon==0): 
        # 5 3 > 0 b
        multiplicity=0
        # 5 3 > 2b
        # if oss is easy
        if ((highestpt_bplus>0) and (highestpt_bminus>0)): multiplicity = 4
        # same sign
        if ((secondHighestpt_b>0) and (highestpt_b>0)): multiplicity = 4
    # 3 21 > 1b
    if (inbjet==0) and (insjet==1) and (ingluon==1): multiplicity=1
    #differentiate between 1b and 1s in 5 21 initial state:
    # 
    if (inbjet==1) and (insjet==0) and (ingluon==1): 
        if highestpt_b > highestpt_s: multiplicity=1
        if highestpt_s > highestpt_b: multiplicity=2
    if (inbjet==0) and (insjet==0) and (ingluon==2):
        if ((highestpt_s > highestpt_bplus) or (highestpt_s > highestpt_bminus)) and highestpt_b > 0: multiplicity=3
        if ((highestpt_bplus > highestpt_s) and (highestpt_bminus > highestpt_s)): multiplicity=4
    # 5 + x
    if (inbjet==1) and (insjet==0) and (ingluon==0) and (inOtherQuarks==1):
        multiplicity = 0
        if (highestpt_b>0): multiplicity = 3   
        
    # 3 + x
    if (inbjet==1) and (insjet==0) and (ingluon==0) and (inOtherQuarks==1):
        multiplicity = 3
        
    
    return {"GenNbJets": nbjet, "GenNsJets": nsjet, 
            "inbjet":inbjet, "insjet":insjet, "ingluon": ingluon,
            "nGenPart": len(event), "multiplicity": multiplicity, "charge_modulo": charge_modulo, "inOtherQuarks": inOtherQuarks}

In [None]:

#def return_jet_multiplicty_2(df, nevents, doSR=True):
#    event = df.loc[nevents]
def return_jet_multiplicty_2(event, doSR=True):
    multiplicity = 0

    #initial state
    inBs = 0
    inSs = 0
    inOQs = 0
    inGs = 0
    for i, part in event.iterrows():
        if not hard_process(int(part.GenPart_statusFlags)): continue
        if part.GenPart_pt != 0: continue
        if abs(part.GenPart_pdgId)==5: inBs += 1
        elif abs(part.GenPart_pdgId)==3: inSs += 1
        elif abs(part.GenPart_pdgId)<10: inOQs += 1
        elif abs(part.GenPart_pdgId)==21: inGs += 1
            
    #leading out stats
    leadOutB = 0
    leadOutB_id = 0
    leadOutB_eta = 0
    leadOutB_phi = 0
    leadOutBMinus = 0
    leadOutBPlus = 0
    leadOutS = 0
    leadOutS_id = 0
    leadOutS_eta = 0
    leadOutS_phi = 0
    underlying_event_b = 0
    for i, part in event.iterrows():
        if part.GenPart_pt  == 0: continue    
        pdgId, pt, eta, phi = part.GenPart_pdgId, part.GenPart_pt, part.GenPart_eta, part.GenPart_phi

        if hard_process(int(part.GenPart_statusFlags)):        
            #leading b pt
            if (abs(pdgId)==5) and (pt>leadOutB):
                leadOutB = pt
                leadOutB_eta = eta
                leadOutB_phi = phi
                leadOutB_id = pdgId
            if (pdgId==-5) and (pt>leadOutBMinus): leadOutBMinus = pt
            if (pdgId==5) and (pt>leadOutBPlus): leadOutBPlus = pt
            if (abs(pdgId)==3) and (pt>leadOutS):
                leadOutS = pt
                leadOutS_eta = eta
                leadOutS_phi = phi
                leadOutS_id = pdgId
                
        else:
            if (abs(pdgId)==5) and (pt>underlying_event_b): underlying_event_b = pt

    #2nd leading out stats        
    secLeadOutB = 0
    secLeadOutS = 0
    sDR = -1000
    for i, part in event.iterrows():
        if not hard_process(int(part.GenPart_statusFlags)): continue
        if part.GenPart_pt  == 0: continue            
        pdgId, pt, eta, phi = part.GenPart_pdgId, part.GenPart_pt, part.GenPart_eta, part.GenPart_phi
        #print(pdgId, pt, eta, phi)
        if abs(pdgId) == 5:
            dr = deltaR(eta, leadOutB_eta, phi, leadOutB_phi)
            if (dr < .4) and (pdgId==leadOutB_id): continue
            if (pt < leadOutB) and (pt > secLeadOutB): secLeadOutB = pt
        if abs(pdgId) == 3:
            
            dr = deltaR(eta, leadOutS_eta, phi, leadOutS_phi)
            if (dr < .4) and (pdgId==leadOutS_id): continue
            if (pt < leadOutS) and (pt > secLeadOutS): 
                secLeadOutS = pt
                sDR = dr
    
    # 0j: 0
    # 1b: 1
    # 1s: 2
    # 1b+1s: 3
    # 2b: 4
    # other: -1       
    #5 5
    
    if (inBs == 2) and (inSs == 0) and (inOQs == 0) and (inGs == 0):
        #2b'
        if (leadOutB>0) and (secLeadOutB>0): multiplicity += 2**11
        #2s
        elif (leadOutS>0) and (secLeadOutS>secLeadOutB): 
            
            multiplicity+=2**16
        #1b1s'
        elif (leadOutB>0) and (leadOutS>0): multiplicity+=2**7   
        else: multiplicity+=2**0
    #5 3 
    if (inBs == 1) and (inSs == 1) and (inOQs == 0) and (inGs == 0):
        #2b'
        if (leadOutB>0) and (secLeadOutB>0): multiplicity += 2**12
        #2s
        elif (leadOutS>0) and (secLeadOutS>secLeadOutB): multiplicity+=2**15
        elif (leadOutS>0) and (leadOutB>0): multiplicity+=2**30
        #1b1s'
        else: multiplicity+=2**1
    
        ##0b
        #multiplicity = 0
        ##2b'
        #if (leadOutB>0) and (secLeadOutB>0): multiplicity = 4.1
        #elif underlying_event_b > 20: multiplicity = 5
    ##5 21
    #if (inBs == 1) and (inSs == 0) and (inOQs == 0) and (inGs == 1):
    #    #1b
    #    if leadOutB > leadOutS: multiplicity = 1.1
    #    #1s
    #    if leadOutS > leadOutB: multiplicity = 2
    ##3 21
    #if (inBs == 0) and (inSs == 1) and (inOQs == 0) and (inGs == 1): multiplicity = 1
    ##21 21
    #if (inBs == 0) and (inSs == 0) and (inOQs == 0) and (inGs == 2):
    #    #2b
    #    if ((leadOutBPlus > leadOutS) and (leadOutBMinus > leadOutS)): multiplicity = 4
    #    #1b+1s
    #    elif (leadOutS > 0) and (leadOutB > 0): multiplicity = 3
    ## 5 + x        
    #if (inBs == 1) and (inSs == 0) and (inOQs == 1) and (inGs ==0):
    #    #1b+1s'
    #    if (leadOutB>0) and (leadOutS>0): multiplicity = 3.2
    #    #1b
    #    elif (leadOutB>0) : multiplicity = 1.2
    #    #1s
    #    if (leadOutS>0): multiplicity = 2.1   
    ## 3 + x        
    #if (inBs == 0) and (inSs == 1) and (inOQs == 1) and (inGs ==0):
    #    #1b
    #    multiplicity = 3.3
    ##2q
    #if (inBs == 0) and (inSs == 0) and (inOQs == 2) and (inGs ==0):
    #    multiplicity = 4.2
    if doSR:
        sr1 = event.SR1_jet_nom_muon_corrected_pt_ele_pt.mean()
        sr2 = event.SR2_jet_nom_muon_corrected_pt_ele_pt.mean()
    else:
        sr1 = -1
        sr2 = -1
    return {"inBs":inBs, "inSs":inSs, "inOQs": inOQs,"inOQs": inOQs, "inGs": inGs, 
           "leadOutB":leadOutB, "leadOutB_id": leadOutB_id, "leadOutB_eta": leadOutB_eta, "leadOutB_phi": leadOutB_phi, 
            "leadOutBMinus": leadOutBMinus, "leadOutBPlus": leadOutBPlus, 
            "leadOutS": leadOutS, "secLeadOutB": secLeadOutB, "secLeadOutS": secLeadOutS,
           "multiplicity":multiplicity, "sr1": sr1, "sr2": sr2, "underlying_event_b": underlying_event_b, "sDR": sDR}    

In [None]:
def make_dataframe(**kwargs):

    njets = []
    start = perf_counter()
    nevents = len(events)
    nevents = int(5000)
    for i in range(nevents):
        current = perf_counter()
        if i % 100==0: 
            print(i, i/nevents)
            if i > 0:
                td = current-start
                nsteps = i/int(100)
                tps = td/nsteps
                steps_remaining = nevents/100 - nsteps
                time_remaining =  tps*steps_remaining
                print(current-start, nsteps, time_remaining)

        njets.append(return_jet_multiplicty_2(i, **kwargs))
    jet_df = pd.DataFrame(njets)
    return jet_df

In [None]:
ufiles = [
    {"mass": 200, "dbs": 0.04, "era": 2016, "file": '/eos/cms/store/group/phys_exotica/bffZprime/private_samples/signal/2016_NanoAODv6/BFFZprimeToMuMu_M200p0_dbs_0p04_2016/BFFZprimeToMuMu_M200p0_dbs_0p04_2016.root'},
    {"mass": 200, "dbs": 1.0, "era": 2016, "file": '/eos/cms/store/group/phys_exotica/bffZprime/private_samples/signal/2016_NanoAODv6/BFFZprimeToMuMu_M200p0_dbs_1p0_2016/BFFZprimeToMuMu_M200p0_dbs_1p0_2016.root'},
    {"mass": 200, "dbs": 0.5, "era": 2016, "file": "/eos/cms/store/group/phys_exotica/bffZprime/private_samples/signal/2016_NanoAODv6/BFFZprimeToMuMu_M200p0_dbs_0p5_2016/BFFZprimeToMuMu_M200p0_dbs_0p5_2016.root"},
     ]
ufiles = pd.DataFrame(ufiles)

In [None]:
for sample in sp.samples():
    name = sample['name']
    dbs = 0.5
    string_dbs = str(dbs).replace('.', 'p')
    if not 'BFF' in name: continue
    if not '200' in name: continue
    if not string_dbs in name: continue
        
    files_df = get_files(sample['fileglob'], sample_path)
    
    multi_list = []
    total_events = 0
    remaining_events = int(1e5)
    for df in events:
        
        #sort out n_events
        n_events  = df.index.get_level_values('entry').unique().shape[0]
        print(remaining_events, n_events)
        if n_events > remaining_events:
            n_events =  remaining_events
        remaining_events += -n_events
        
            
        for i in range(n_events):
            multi_list.append(return_jet_multiplicty_2(df.loc[i]))
        if remaining_events <= 0: break
        
        
    #file = files_df.file.iloc[0]
#
    #events = upr.open(file)['Events']
    #df = events.arrays(['GenPart_pdgId',  'GenPart_statusFlags', 'GenPart_pt', 'GenPart_eta', 'GenPart_phi', 
    #                    'SR1_jet_nom_muon_corrected_pt_ele_pt', 'SR2_jet_nom_muon_corrected_pt_ele_pt'], library='pd')
    #jet_df = make_dataframe()
    
    
    ##ufile
    #tuf = ufiles[(ufiles.mass==200) & (ufiles.dbs==dbs) & (ufiles.era==2016)]
    
    #events = upr.open(tuf.file.iloc[0])['Events']
    #df = events.arrays(['GenPart_pdgId',  'GenPart_statusFlags', 'GenPart_pt', 'GenPart_eta', 'GenPart_phi'], library='pd')
    #ujet_df = make_dataframe(doSR=False)


In [None]:
tdf = pd.DataFrame(multi_list)

In [None]:
np.log(tdf.multiplicity.unique())/np.log(2), 2**16

In [None]:
tdf[(tdf.multiplicity==32768)].sDR.hist(bins=np.linspace(0,4,5), label='0b(1b)')
tdf[(tdf.multiplicity==65536)].sDR.hist(bins=np.linspace(0,4,5), label='0b(2b)')
plt.xlabel('$\Delta_{s,s}$', fontsize=20)
plt.ylabel('Count')
plt.legend(title='200 GeV $\delta_{bs}==0.5$')

In [None]:
break

In [None]:
tdf[(tdf.multiplicity==65536)].sDR

In [None]:
ujet_df[(ujet_df.sr2==1) & (ujet_df.multiplicity==2)]

In [None]:
ujet_df.multiplicity

In [None]:
select = (jet_df.multiplicity==2).sum()
select2 = (jet_df.multiplicity==1).sum()

uselect = (ujet_df.multiplicity==2).sum()
select, uselect, select/uselect, select2

In [None]:
select = (jet_df.multiplicity==2).sum()
uselect = (ujet_df.multiplicity==2).sum()
select, uselect, select/uselect

In [None]:
select = (jet_df.multiplicity==2).sum()
uselect = (ujet_df.multiplicity==2).sum()
select, uselect, select/uselect

In [None]:
select = (jet_df.multiplicity==2).sum()
uselect = (ujet_df.multiplicity==2).sum()
select, uselect, select/uselect

In [None]:
def get_multiplicty(value):
    return np.log(value)/np.log(2)

In [None]:
jet_df['multiplicity2'] = jet_df.multiplicity.apply(get_multiplicty)

In [None]:
np.histogram(jet_df['multiplicity2'], bins=np.linspace(-.5,13.5, 15))

In [None]:
jet_df.secLeadOutS.hist()
plt.yscale('log')

In [None]:
break

In [None]:
def multi_profile(_df):
    multiplicities = [-1., 0.,   1.,   1.1,  2.,   2.1,  3., 3.1, 3.2,    3.3,  4. ]
    totals = []
    for m in multiplicities:
        totals.append(np.sum(_df.multiplicity==m))
    return {k:v for k,v in zip(multiplicities, totals)}

In [None]:
files = {
    #0.04: '/eos/cms/store/group/phys_exotica/bffZprime/private_samples/signal/2016_NanoAODv6/BFFZprimeToMuMu_M200p0_dbs_0p04_2016/BFFZprimeToMuMu_M200p0_dbs_0p04_2016.root',
    #1.0: '/eos/cms/store/group/phys_exotica/bffZprime/private_samples/signal/2016_NanoAODv6/BFFZprimeToMuMu_M200p0_dbs_1p0_2016/BFFZprimeToMuMu_M200p0_dbs_1p0_2016.root',
    .5: "/eos/cms/store/group/phys_exotica/bffZprime/private_samples/signal/2016_NanoAODv6/BFFZprimeToMuMu_M200p0_dbs_0p5_2016/BFFZprimeToMuMu_M200p0_dbs_0p5_2016.root"
}    

In [None]:
data_frames = {}
for dbs, file in files.items():
    print(dbs)
    events = upr.open(file)['Events']
    print(events.num_entries)
    df = events.arrays(['GenPart_pdgId',  'GenPart_statusFlags', 'GenPart_pt', 'GenPart_eta', 'GenPart_phi', 
                        'SR1_jet_nom_muon_corrected_pt_ele_pt', 'SR2_jet_nom_muon_corrected_pt_ele_pt'], library='pd')
    jet_df = make_dataframe()
    data_frames[dbs] = jet_df

In [None]:
data_frames[dbs]

In [None]:
precut_data_frames = {}
for sample in sp.samples():
    name = sample['name']
    if not 'BFF' in name: continue
    if not '200' in name: continue
    if '0p04' in name: continue
    files_df = get_files(sample['fileglob'], sample_path)
    file = files_df.file.iloc[0]
    events = upr.open(file)['Events']
    df = events.arrays(['GenPart_pdgId',  'GenPart_statusFlags', 'GenPart_pt', 'GenPart_eta', 'GenPart_phi'], library='pd')
    jet_df = make_dataframe()
    
    dbs = 0.04
    if '0p5' in name: dbs=0.5
    if '1p0' in name: dbs=1.0
    precut_data_frames[dbs] = jet_df

In [None]:
def calc_ratios(dbs):
    data = multi_profile(data_frames[dbs])
    precut = multi_profile(precut_data_frames[dbs])
    return {k: data[k]/precut[k] for k in data.keys()}

In [None]:
calc_ratios(.5)

In [None]:
calc_ratios(1.0)

In [None]:
multi_profile(precut_data_frames[.5])

In [None]:
multi_profile(precut_data_frames[1.0])

In [None]:
multi_profile(data_frames[.5])

In [None]:
multi_profile(data_frames[1.0])

In [None]:
precut_data_frames[.5][precut_data_frames[.5].multiplicity==1].leadOutB_eta.hist(bins= np.linspace(-4,4, 10))

In [None]:
precut_data_frames[1.0][precut_data_frames[1.0].multiplicity==1].leadOutB_eta.hist(bins= np.linspace(-4,4, 10))

In [None]:
precut_data_frames[.5][precut_data_frames[.5].multiplicity==1.1].leadOutB_eta.hist(bins= np.linspace(-4,4, 10))

In [None]:
precut_data_frames[1.0][precut_data_frames[1.0].multiplicity==1.1].leadOutB_eta.hist(bins= np.linspace(-4,4, 10))

In [None]:
hist  = np.histogram(jet_df.multiplicity, bins=np.linspace(-1.5, 4.5, 7), density=True)

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
langs = ['C', 'C++', 'Java', 'Python', 'PHP']
students = [23,17,35,29,12]
ax.bar(['other', '0b', '1b', '1s', '1b+1s', '2b'], hist[0])

plt.yscale('log')
hist[0]

In [None]:
hist  = np.histogram(jet_df.multiplicity, bins=np.linspace(-1.5, 4.5, 7), density=True)

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
langs = ['C', 'C++', 'Java', 'Python', 'PHP']
students = [23,17,35,29,12]
ax.bar(['other', '0b', '1b', '1s', '1b+1s', '2b'], hist[0])

plt.yscale('log')
hist[0]

In [None]:
hist  = np.histogram(jet_df.multiplicity, bins=np.linspace(-1.5, 4.5, 7), density=True)

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
langs = ['C', 'C++', 'Java', 'Python', 'PHP']
students = [23,17,35,29,12]
ax.bar(['other', '0b', '1b', '1s', '1b+1s', '2b'], hist[0])

plt.yscale('log')
hist[0]

In [None]:
jet_df[jet_df.multiplicity==-1]

In [None]:
bin(20481), bin(10625), bin(4481)

In [None]:
check_bitwise(10625, 0), check_bitwise(4481, 0), check_bitwise(20481, 0)

In [None]:
event = df.loc[780]
initial = event[event.GenPart_pt==0]

outgoing = event[event.GenPart_pt>0]
ishardprocess_flag = outgoing.GenPart_statusFlags.apply(hard_process)
ishardprocess = outgoing[ishardprocess_flag]
dedupe = find_duplicates(ishardprocess)
ishardprocess = ishardprocess[dedupe]
initial

In [None]:
ishardprocess

In [None]:
(check_bitwise(8193, 7), check_bitwise(8193, 8)),  (check_bitwise(8193, 13))

In [None]:
jet_df[jet_df.multiplicity==-1]

In [None]:
return_jet_multiplicty(22)

In [None]:
jet_df.GenNbJets.hist(bins=np.linspace(-.5,7.5, 9))
plt.xlabel('n b-jets')
plt.title('before selection')

In [None]:
hist  = np.histogram(jet_df.multiplicity, bins=np.linspace(-1.5, 4.5, 7), density=True)

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
langs = ['C', 'C++', 'Java', 'Python', 'PHP']
students = [23,17,35,29,12]
ax.bar(['other', '0b', '1b', '1s', '1b+1s', '2b'], hist[0])

plt.yscale('log')


In [None]:
hist

In [None]:
jet_df.multiplicity.hist(bins=np.linspace(-1.5, 4.5, 7), density=True)
plt.yscale('log')


In [None]:
jet_df.GenNbJets.hist(bins=np.linspace(-.5,7.5, 9))
plt.xlabel('n b-jets')
plt.title('before selection')

In [None]:
jet_df.GenNsJets.hist(bins=np.linspace(-.5,7.5, 9))
plt.xlabel('n s-jets')
plt.title('before selection')

In [None]:
hist = np.histogram(jet_df.GenNsJets, bins=np.linspace(-.5,7.5, 9))
hist[0]*25

In [None]:
#### skimmed samples

In [None]:
for sample in sp.samples():
    name = sample['name']
    if not 'BFF' in name: continue
    if not '350' in name: continue
    if not '0p5' in name: continue
    files_df = get_files(sample['fileglob'], sample_path)
    file = files_df.file.iloc[0]
    events = upr.open(file)['Events']
    df = events.arrays(['GenPart_pdgId',  'GenPart_statusFlags', 'GenPart_pt', 'GenPart_eta', 'GenPart_phi', 
                        'SR1_jet_nom_muon_corrected_pt_ele_pt', 'SR2_jet_nom_muon_corrected_pt_ele_pt'], library='pd')
    jet_df = make_dataframe()

In [None]:
hist  = np.histogram(jet_df.multiplicity, bins=np.linspace(-1.5, 5.5, 8), density=False)[0]
sr1hist  = np.histogram(jet_df[jet_df.sr1==1].multiplicity, bins=np.linspace(-1.5, 5.5, 8), density=False)[0]
sr1hist, hist, [ round(x,3) for x in sr1hist/(hist+1e-5)]

In [None]:
hist  = np.histogram(jet_df.multiplicity, bins=np.linspace(-1.5, 5.5, 8), density=False)[0]
sr1hist  = np.histogram(jet_df[jet_df.sr1==1].multiplicity, bins=np.linspace(-1.5, 5.5, 8), density=False)[0]
sr1hist, hist, [ round(x,3) for x in sr1hist/(hist+1e-5)]

In [None]:
(560+66)/(5904+2531), (583+48)/(5470+2925)

In [None]:
jet_df[jet_df.multiplicity==0].shape[0]/1e4, jet_df[jet_df.multiplicity==0.1].shape[0]/1e4

In [None]:
jet_df[jet_df.multiplicity==0].shape[0]/1e4, jet_df[jet_df.multiplicity==0.1].shape[0]/1e4

In [None]:
jet_df[jet_df.multiplicity==0].shape[0]/1e4, jet_df[jet_df.multiplicity==0.1].shape[0]/1e4

In [None]:
selection = []
non_hard = []
ishard = []
for i, row in jet_df[jet_df.multiplicity==5].iterrows():
    isselected = df.loc[i].SR1_jet_nom_muon_corrected_pt_ele_pt.mean()
    if isselected: 
        
        tdf = df.loc[i]
        tdf = tdf[tdf.GenPart_pt>0]
        tdf = tdf[abs(tdf.GenPart_pdgId) == 5]
        
        ishard_process = tdf.GenPart_statusFlags.apply(hard_process)
        #tdf = tdf[(tdf.GenPart_pt > 20) | (ishard_process)]
        #ishard_process = tdf.GenPart_statusFlags.apply(hard_process)
        nonhard = len(ishard_process) - ishard_process.sum()
        non_hard.append(nonhard)
        ishardvalue = ishard_process.sum()
        ishard.append(ishardvalue)
        
        print(i, isselected, nonhard, ishardvalue)
    
    selection.append(selection)
    if i > 100: break

In [None]:
(np.array(non_hard)>0).mean(), (np.array(ishard)>0).mean(), ((np.array(non_hard)>0) + (np.array(ishard)>0)).mean()

In [None]:
for i in range(14):
    print(i, check_bitwise(12354, i))

In [None]:
df.keys()

In [None]:
pd.options.display.float_format = "{:,.2f}".format

In [None]:
jet_df.multiplicity.unique()

In [None]:
j = 0
for i, row in jet_df.iterrows():
    if j > 10: break
    if not (row.inBs==2): continue
    #if not((row.inBs==1) & (row.inBs==1)): continue
    if not((row.leadOutB>0) & (row.leadOutS>0)): continue
    print(i) 
    j+=1

In [None]:
i = 108
df[df.GenPart_statusFlags.apply(hard_process)].loc[i][['GenPart_pdgId', 'GenPart_statusFlags', 'GenPart_pt', 'GenPart_eta',
       'GenPart_phi',]]

In [None]:
print(df.loc[i][['GenPart_pdgId', 'GenPart_statusFlags', 'GenPart_pt', 'GenPart_eta',
       'GenPart_phi',]].to_latex(index=False))

In [None]:
print(df.loc[0][['GenPart_pdgId', 'GenPart_statusFlags', 'GenPart_pt', 'GenPart_eta',
       'GenPart_phi',]].to_latex(index=False))

In [None]:
hist  = np.histogram(jet_df.multiplicity, bins=np.linspace(-1.5, 5.5, 8), density=True)

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
langs = ['C', 'C++', 'Java', 'Python', 'PHP']
students = [23,17,35,29,12]
ax.bar(['other', '0b', '1b', '1s', '1b+1s', '2b', '0b+'], hist[0])

plt.yscale('log')
hist[0]

In [None]:
hist  = np.histogram(jet_df.multiplicity, bins=np.linspace(-1.5, 5.5, 8), density=True)

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
langs = ['C', 'C++', 'Java', 'Python', 'PHP']
students = [23,17,35,29,12]
ax.bar(['other', '0b', '1b', '1s', '1b+1s', '2b', '0b+'], hist[0])

plt.yscale('log')
hist[0]

In [None]:
jet_df.iloc[8]

In [None]:
df.loc[8]

In [None]:
for sample in sp.samples():
    name = sample['name']
    if not 'BFF' in name: continue
    if not '750' in name: continue
    if not '0p04' in name: continue
    files_df = get_files(sample['fileglob'], sample_path)
    file = files_df.file.iloc[0]
    events = upr.open(file)['Events']
    df = events.arrays(['GenPart_pdgId',  'GenPart_statusFlags', 'GenPart_pt', 'GenPart_eta', 'GenPart_phi'], library='pd')
    jet_df = make_dataframe()

In [None]:
hist  = np.histogram(jet_df.multiplicity, bins=np.linspace(-1.5, 4.5, 7), density=True)

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
students = [23,17,35,29,12]
ax.bar(['other', '0b', '1b', '1s', '1b+1s', '2b'], hist[0])

plt.yscale('log')
hist[0]

In [None]:
jet_df[jet_df.multiplicity==-1]

In [None]:
df.loc[277][df.loc[277].GenPart_statusFlags.apply(hard_process)]

In [None]:
hard_process(8193)

In [None]:
df.loc[277]