In [None]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from matplotlib.colors import LogNorm
from matplotlib import rc
from numpy import inf
import os

from os import listdir


import uproot3


rc('text', usetex=True)

import matplotlib as mpl
rc('font', family='serif')
rc('text', usetex=True)
rc('font', size=22)
rc('xtick', labelsize=15)
rc('ytick', labelsize=15)
rc('legend', fontsize=15)

#
mpl.rcParams.update({'font.size': 19})
#mpl.rcParams.update({'legend.fontsize': 18})
mpl.rcParams.update({'xtick.labelsize': 18}) 
mpl.rcParams.update({'ytick.labelsize': 18}) 
mpl.rcParams.update({'text.usetex' : False})
mpl.rcParams.update({'axes.labelsize': 18}) 
mpl.rcParams.update({'legend.frameon': False}) 

#import mplhep as hep
#hep.set_style(hep.style.ROOT)

In [None]:
# Define default plot styles
plot_style_0 = {
    'histtype': 'step',
    'color': 'black',
    'linewidth': 2,
    'linestyle': '--',
    'density': True
}

plot_style_1 = {
    'histtype': 'step',
    'color': 'black',
    'linewidth': 2,
    'density': True
}

plot_style_2 = {'alpha': 0.5, 'density': True}

plot_style_1A = {
    'histtype': 'step',
    'color': 'black',
    'linewidth': 2,
    'density': False
}

plot_style_2A = {'alpha': 0.5, 'density': False}


In [None]:
def get_Dataframe(path, name='Data', tag='nom'):
    Files = listdir(path) 
    #print (Files)
    df = None
    for i, f in enumerate(Files):
   
        if name not in f: continue
        if tag not in f: continue
        filename = path+f
        print ('filename is' , filename)
        
        temp_file = uproot3.open(filename)
        
        hasTree = False 
        for key in temp_file[name].keys():
            if('minitree' in str(key)):
                hasTree=True
        if (not hasTree):
            print('file has not minitree, skipping')
            continue

        temp_tree = temp_file[name+'/minitree']

        
        temp_df = None
        
        if 'Data' not in name:
            try:
                temp_df   =  temp_tree.pandas.df(["jet*", "genjet*","Q2","gen_Q2","y",'gen_y',"e_*","gene*","tau*","gen_tau*"], entrystop=3e6,flatten=True)
                df = pd.concat([df,temp_df])
            except ValueError:
                print ('oops, there is a problem in flattening the TTree ')
        else:
            try:
                temp_df   =  temp_tree.pandas.df(["jet*","Q2","y","e_*","tau*"], entrystop=3e6,flatten=True) 
                df = pd.concat([df,temp_df])
            except ValueError:
                print ('oops, there is a problem in flattening the TTree ')
        
        #try:
        #    df.shape[0]
        #except ValueError:
        #    print('no valid dataframe')
    print('####################################################################')
    print('Dataframe has a total of ', df.shape[0], ' entries')
    print('####################################################################')

    return df

In [None]:
def applyCut(inputDataframe, cut, text=None):
    dataframe = inputDataframe
    nbeforecut = dataframe.shape[0]
    cutDataframe = dataframe.query(cut)
    if text:
        print (text, cutDataframe.shape[0], ' fraction kept: %2.1f'%(100.0*float(cutDataframe.shape[0])/nbeforecut))
    return cutDataframe

In [None]:
def applyCutsJets(df,isMC=False):
    temp = df
    #temp = applyCut(temp, 'abs(vertex_z)<25 and vertex_z!=0','abs(vertex_z)<25 and and vertex_z!=0')
    #temp = applyCut(temp, 'tau1b>0 and tau1b<1', '0<tau1b<1')
    temp.eval('jet_px = jet_pt*cos(jet_phi)', inplace=True)
    temp.eval('jet_py = jet_pt*sin(jet_phi)', inplace=True)
    temp.eval('jet_pz = jet_pt*sinh(jet_eta)', inplace=True)

    temp.eval('jet_qt = sqrt( (jet_px + e_px)**2 + (jet_py + e_py)**2) ', inplace=True)
    temp.eval('jet_qtnorm = jet_qt/sqrt(Q2)', inplace=True)
    temp.eval('e_pt = sqrt(e_px*e_px + e_py*e_py)',inplace=True)
    temp.eval('e_phi = arctan(e_py/e_px)', inplace=True)
    temp.eval('e_theta = abs(arctan(e_py/e_pz))', inplace=True)
    temp.eval('jet_theta = abs(arctan(jet_py/jet_pz))', inplace=True)

    temp.eval('e_p = sqrt(e_px*e_px + e_py*e_py + e_pz*e_pz)', inplace=True)

    temp.eval('jet_phi = arctan(jet_py/jet_px)',inplace=True)
    temp.eval('jet_dphi = e_phi-jet_phi',inplace=True)
    temp.eval('logQ2= log(Q2)/2.3025850', inplace=True)
    temp.eval('Q = sqrt(Q2)', inplace=True)
    temp = applyCut(temp, '0.08 < y < 0.7', '0.08 < y < 0.7')
    temp = applyCut(temp, 'Q2>150', 'Q2>150')
    temp = applyCut(temp, 'pass_reco==0 | jet_pt>5.0', 'jet pT > 5 GeV')
    temp = applyCut(temp, 'pass_reco==0 | jet_eta>-1.0', 'jet eta > -1.0')
    temp = applyCut(temp, 'pass_reco==0 | jet_eta<2.5', 'jet eta < 2.5')

    if(isMC):
        temp.eval('gen_logQ2= log(gen_Q2)/2.3025850', inplace=True)   
        temp.eval('gen_Q    = sqrt(gen_Q2)', inplace=True)
        temp.eval('gene_pt = sqrt(gene_px*gene_px + gene_py*gene_py)',inplace=True)
        temp.eval('gene_p = sqrt(gene_px*gene_px + gene_py*gene_py + gene_pz*gene_pz)',inplace=True)
        temp.eval('gene_theta = abs(arctan(gene_py/gene_pz))', inplace=True)

        temp.eval('genjet_px = genjet_pt*cos(genjet_phi)', inplace=True)
        temp.eval('genjet_py = genjet_pt*sin(genjet_phi)', inplace=True)
        temp.eval('genjet_pz = genjet_pt*sinh(genjet_eta)', inplace=True)
        temp.eval('genjet_theta = abs(arctan(genjet_py/genjet_pz))', inplace=True)

        temp.eval('genjet_qt = sqrt( (genjet_px + gene_px)**2 + (genjet_py + gene_py)**2) ', inplace=True)
        temp.eval('genjet_qtnorm = genjet_qt/sqrt(gen_Q2)', inplace=True)
        temp.eval('gene_phi = arctan(gene_py/gene_px)', inplace=True)
        temp.eval('genjet_phi = arctan(genjet_py/genjet_px)',inplace=True)
        temp.eval('genjet_dphi = gene_phi-genjet_phi',inplace=True)
        
    #    temp.eval('genjet_qtnormept= genjet_qt/e_pt', inplace=True)
    #    temp.eval('genjet_qtnormjetpt= genjet_qt/genjet_pt', inplace=True)


    #df = applyCut(df, 'n_total>1', ' n>1')
    return temp

In [None]:
mc_name = 'Django'
altmc_name = 'Rapgap'

#altmc_name = 'Rapgap'
#mc_name = 'Django'

In [None]:

path = '/home/miguel/data/hera/'
data = get_Dataframe(path, name='Data')

In [None]:

sys_data = get_Dataframe(path, name='Data')

In [None]:
data['pass_reco'] = np.where(data['jet_pt']>0, 1, 0)
sys_data['pass_reco'] = np.where(sys_data['jet_pt']>0, 1, 0)
print('Selecting data events\n')
data = applyCutsJets(data)


print('Selecting data events\n')
sys_data = applyCutsJets(sys_data)

In [None]:
%%time
mc = get_Dataframe(path, name=mc_name)


In [None]:
%%time

sys_mc = get_Dataframe(path, name=mc_name, tag = 'sys_6.')

In [None]:
mc['pass_reco'] = np.where(mc['jet_pt']>0, 1, 0)
mc['pass_truth'] = np.where(mc['jet_pt']>0, 1, 0)

sys_mc['pass_reco'] = np.where(sys_mc['jet_pt']>0, 1, 0)
sys_mc['pass_truth'] = np.where(sys_mc['jet_pt']>0, 1, 0)



In [None]:
mc.keys()

In [None]:
%%time

print('Selecting MC events\n')
mc   = applyCutsJets(mc, isMC=True)

print('Selecting MC events\n')
sys_mc   = applyCutsJets(sys_mc, isMC=True)

In [None]:
for obs in ['y','gen_y','Q','gen_Q','jet_pt','genjet_pt','e_p','gene_p','e_theta','gene_theta','jet_theta','genjet_theta','jet_phi','genjet_phi','e_phi','gene_phi','jet_eta','genjet_eta','jet_dphi','gen_dphi']:
    if 'gen' in obs: continue
    print('MC: obs ' , obs , ' nominal = %2.2f'%mc[obs].median(), ' systematic = %2.2f'%sys_mc[obs].median(),' ========%2.1f '%(100*(1- (mc[obs].median()/ sys_mc[obs].median()) )),' %')
   

In [None]:
for obs in ['y','gen_y','Q','gen_Q','jet_pt','genjet_pt','e_p','gene_p','e_theta','gene_theta','jet_theta','genjet_theta','jet_phi','genjet_phi','e_phi','gene_phi','jet_eta']:
    if 'gen' not in obs:
         print('Data: obs ' , obs , ' %2.2f'%data[obs].median(), ' %2.2f'%sys_data[obs].median(),'========%2.1f '%(100*(1- (data[obs].median()/ sys_data[obs].median()) )) ,'%')
         #print('#######################################################')

In [None]:
# label = {}
# label['test'] = 'repeat'
# label['sys0'] = 'HFS scale (in jet)'
# label['sys1'] = 'HFS scale (remainder)'
# label['sys6'] = 'lepton energy scale'
# label['sys9'] = 'lepton polar angle'
# label['model'] = 'Model'