### Download missing libraries
Comment in the following two lines in case some of the libraries cannot be imported. Please restart the kernel after download+upgrade has successfully finished.

**Please comment in these lines when the libraries cannot be imported below**

In [None]:
### Download libraries
#%pip install uproot 
#%pip install awkward 
#%pip install mplhep 
#%pip install numpy 
#%pip install matplotlib 
#%pip install scipy

### Upgrade libraries to latest version
#%pip install uproot awkward mplhep numpy matplotlib scipy --upgrade

### Import libraries

In [None]:
import uproot
import awkward as ak
import mplhep
%matplotlib inline 
import numpy as np
import matplotlib.pyplot as plt

## *Example*: Reading a ROOT.TTree and plotting a variable with a cut

We will open data and Monte Carlo samples using **uproot**. Uproot is a reader and a writer of the ROOT file format using only Python and Numpy. Unlike PyROOT and root_numpy, uproot does not depend on C++ ROOT so that no local compilation of the ROOT libraries is needed to access the data.

You can find more info on uproot following the references:
* Github repo: https://github.com/scikit-hep/uproot4
* Tutorial: https://masonproffitt.github.io/uproot-tutorial/
* Video tutorial on uproot and awkward arrays:  https://www.youtube.com/embed/ea-zYLQBS4U 

First, let's specify the folder path for both data and Monte Carlo (MC) samples

In [None]:
path_data = 'samples/data/'
path_mc = 'mc_z0experiment/'

### Open the file introducing file path
#file = uproot.open(path_data+'daten.root')
file_ee = uproot.open(path_mc+'ee.root')
file_mm = uproot.open(path_mc+'mm.root')
file_tt = uproot.open(path_mc+'tt.root')
file_qq = uproot.open(path_mc+'qq.root')

ttree_name = 'myTTree'

### Print list of 'branches' of the TTree (i.e. list of variable names)
file_ee[ttree_name].keys()

## Load branches
branches = np.array([file_ee[ttree_name].arrays(),
                   file_mm[ttree_name].arrays(),
                   file_tt[ttree_name].arrays(),
                   file_qq[ttree_name].arrays()],
                    dtype=object)

print(type(branches))
## Define an numpy array for 'Pcharged'
# var = 'E_ecal'
nchar = np.array([ak.to_numpy(branches[0]['Ncharged']),
                ak.to_numpy(branches[1]['Ncharged']),
                ak.to_numpy(branches[2]['Ncharged']),
                ak.to_numpy(branches[3]['Ncharged'])],
                dtype=object)
pchar = np.array([ak.to_numpy(branches[0]['Pcharged']),
                ak.to_numpy(branches[1]['Pcharged']),
                ak.to_numpy(branches[2]['Pcharged']),
                ak.to_numpy(branches[3]['Pcharged'])],
                dtype=object)
#ecal = ak.to_numpy(branches[0][var]) # See Docu (https://awkward-array.org/how-to-convert-numpy.html) for more conversions
ecal = np.array([ak.to_numpy(branches[0]['E_ecal']),
                ak.to_numpy(branches[1]['E_ecal']),
                ak.to_numpy(branches[2]['E_ecal']),
                ak.to_numpy(branches[3]['E_ecal'])],
                dtype=object)
hcal = np.array([ak.to_numpy(branches[0]['E_hcal']),
                ak.to_numpy(branches[1]['E_hcal']),
                ak.to_numpy(branches[2]['E_hcal']),
                ak.to_numpy(branches[3]['E_hcal'])],
                dtype=object)
cos_thet = np.array([ak.to_numpy(branches[0]['cos_thet']),
                ak.to_numpy(branches[1]['cos_thet']),
                ak.to_numpy(branches[2]['cos_thet']),
                ak.to_numpy(branches[3]['cos_thet'])],
                dtype=object)

#hcal
#print(f"Array of type '{type(ecal)}' defined for '{var}':\n{ecal}")
#print(ecal.max())

### Apply a cut

Cuts are applied by *masking* the array. This can be done as follows:

In [None]:
## Create a mask for certain selection
ecal_lower, ecal_upper = 72, 120 
ncharged_upper = 6
masks = np.array([
    (branches[0]['E_ecal'] >= ecal_lower) & (branches[0]['E_ecal'] <= ecal_upper) & \
        (branches[0]['Ncharged'] <= ncharged_upper),
    (branches[1]['E_ecal'] >= ecal_lower) & (branches[1]['E_ecal'] <= ecal_upper) & \
        (branches[1]['Ncharged'] <= ncharged_upper),
    (branches[2]['E_ecal'] >= ecal_lower) & (branches[2]['E_ecal'] <= ecal_upper) & \
        (branches[2]['Ncharged'] <= ncharged_upper),
    (branches[3]['E_ecal'] >= ecal_lower) & (branches[3]['E_ecal'] <= ecal_upper) & \
        (branches[3]['Ncharged'] <= ncharged_upper)],
    dtype=object)


eff = np.array([sum(masks[0])/len(masks[0]),
               sum(masks[1])/len(masks[1]),
               sum(masks[2])/len(masks[2]),
               sum(masks[3])/len(masks[3])])
errors = np.array([eff[0] * np.sqrt(np.power(np.sqrt(sum(masks[0]))/sum(masks[0]), 2) + np.power(np.sqrt(len(masks[0]))/len(masks[0]), 2)),
                  eff[1] * np.sqrt(np.power(np.sqrt(sum(masks[1]))/sum(masks[1]), 2) + np.power(np.sqrt(len(masks[1]))/len(masks[1]), 2)),
                  eff[2] * np.sqrt(np.power(np.sqrt(sum(masks[2]))/sum(masks[2]), 2) + np.power(np.sqrt(len(masks[2]))/len(masks[2]), 2)),
                  eff[3] * np.sqrt(np.power(np.sqrt(sum(masks[3]))/sum(masks[3]), 2) + np.power(np.sqrt(len(masks[3]))/len(masks[3]), 2))])

## The sum of this array provides the number of events that passed this cut
print(f"A total of '{sum(masks[0])}' out of '{len(masks[0])}' events passed the cut")
print(f"The scaling factor is {1e5/len(masks[0]):.4f}")
print(f"*The efficiency for electrons is ({eff[0]*100:.1f} ± {errors [0]*100:.1f})%.")
print(f"The efficiency for muons is ({eff[1]*100:.4f} ± {errors[1]*100:.4f})%.")
print(f"The efficiency for tauons is ({eff[2]*100:.2f} ± {errors[2]*100:.2f})%.")
print(f"The efficiency for hadrons is ({eff[3]*100:.3f} ± {errors[3]*100:.3f})%.")

### Some useful information about your selection

### Make a plot with a certain selection

In [None]:
from scipy.optimize import curve_fit
import scipy.integrate as integrate
N = 300

plt.style.use(mplhep.style.ATLAS) # You can load ATLAS/CMS/ALICE plot style 
plt.figure(figsize=(7,5))

#mask_thet = (branches[0]['cos_thet'] >= -0.9) & (branches[0]['cos_thet'] <= 0.9)

def angdis(x, a, b):
    return a * (1 + np.power(x, 2)) + b * np.power(1 - x, -2)

bin_content, bin_edges, _ = plt.hist(cos_thet[0],bins=N,range=(-1.,1.), histtype='step',  linewidth=2, edgecolor='#396ab1', hatch='/', label='cos_thet')
mid = 0.5*(bin_edges[1:] + bin_edges[:-1]) #Calculate midpoint of the bars
mask_mid = (mid >= -0.9) & (mid <= 0.9)

pcov, pvar = curve_fit(angdis, mid[mask_mid], bin_content[mask_mid], sigma=bin_content[mask_mid], absolute_sigma=True)
perr = np.sqrt(np.diag(pvar))
#print(np.sqrt(np.diag(pvar)[0]))

plt.plot(mid, angdis(mid, *pcov), lw=2, c='#da7c30')
plt.plot(mid, angdis(mid, pcov[0], 0), lw=2, c='#cc2529')

integral = lambda x: x**3 / 3 + x
#print(1/(2/N) * pcov[0] * (integral(1) - integral(-1)))
#print(np.diag(pvar)[0])
result = 1/(2/N) * np.array([pcov[0], perr[0]]) * (integral(1) - integral(-1))
print(1/(2/N) * pcov[0] * (integral(0.5) - integral(-0.9)))
print(1/(2/N) * perr[0] * (integral(0.5) - integral(-0.9)))

print(sum(bin_content[(mid >= -0.9) & (mid <= 0.5)]), np.sqrt(sum(bin_content[(mid >= -0.9) & (mid <= 0.5)])))
print(result)
print(f"{round(result[0], -1):.0f} ± {round(result[1],-2):.0f}")
print(abs(18233-20531.0)/np.sqrt(1529**2 + 143**2))

### Show the plot on screen
plt.title('Angular distribution of electron events')
plt.xlim(-1.,1.)
plt.ylim(0.,round(max(bin_content)*1.1, -2))
plt.xlabel('$\\cos({\\theta})$')
plt.ylabel('Number of events')
plt.show()

