# Plotting jets as images

In [1]:
import time
from coffea import hist
from coffea.analysis_objects import JaggedCandidateArray
import coffea.processor as processor
from awkward import JaggedArray, MaskedArray
import numpy as np
import glob as glob
import itertools
import json
import uproot_methods
import copy
from coord_transform import *
import tensorflow as tf
import pandas as pd

np.set_printoptions(threshold=np.inf)

In [2]:
def pseudorapidity(theta):
    return -np.log(np.tan(theta*0.5))

In [3]:
def dataStream(output):
    image = output['flippedImages'].value
    jetPt = output['jetPt'].value
    jetEta = output['jetEta'].value
    jetPhi = output['jetPhi'].value
    jetTau21 = output['jetTau21'].value
    jetTau31 = output['jetTau31'].value
    jetTau32 = output['jetTau32'].value
    chMult = output['chargedHadronMult'].value
    neutMult = output['neutralHadronMult'].value
    phoMult = output['photonMult'].value
    eleMult = output['electronMult'].value
    muMult = output['muonMult'].value

    
    print(chMult.shape)
    print(neutMult.shape)
    print(phoMult.shape)
    print(eleMult.shape)
    print(muMult.shape)
    
    jetconstEvnum = output['jetconstEvnum'].value
    jetconstEta = output['jetconstEta'].value
    jetconstPhi = output['jetconstPhi'].value
    jetconstPt = output['jetconstPt'].value
    chargedHadronPt = output['chargedHadronPt'].value
    neutralHadronPt = output['neutralHadronPt'].value

    jetFile = '/mnt/data/ml/ShowJetsDataTest2'
    constituentFile = '/mnt/data/ml/Constituent4vecDataTest2'
    
    labels = output['label'].value
    np.savez(jetFile, jetImages = image, jetPt = jetPt,
             jetEta = jetEta, jetPhi = jetPhi, tau21=jetTau21, chMult=chMult, neutMult = neutMult, 
             phoMult=phoMult, eleMult=eleMult, muMult=muMult, labels = labels)
    labels = np.repeat(labels, 20, axis=0).reshape(len(labels)*20, 4)
    np.savez(constituentFile, jetconstEvnum=jetconstEvnum, jetconstPt = jetconstPt, 
            jetconstEta = jetconstEta, jetconstPhi = jetconstPhi, chargedHadronPt = chargedHadronPt,
             neutralHadronPt = neutralHadronPt, tau21=jetTau21, chMult=chMult, neutMult = neutMult,
             phoMult=phoMult, eleMult=eleMult, muMult=muMult, labels = labels)

In [4]:
def transform_to_softdrop_axes(jetparticles,Jets):
    
    subjet0leading = (Jets.subjet0_pt > Jets.subjet1_pt)
    centroid1_pt = np.where( subjet0leading, Jets.subjet0_pt, Jets.subjet1_pt  )
    centroid1_eta = np.where( subjet0leading, Jets.subjet0_eta, Jets.subjet1_eta  )
    centroid1_phi = np.where( subjet0leading, Jets.subjet0_phi, Jets.subjet1_phi  )
    centroid1_m = np.where( subjet0leading, Jets.subjet0_m, Jets.subjet1_m  )
    centroid2_pt = np.where( subjet0leading, Jets.subjet1_pt, Jets.subjet0_pt,   )
    centroid2_eta = np.where( subjet0leading, Jets.subjet1_eta, Jets.subjet0_eta  )
    centroid2_phi = np.where( subjet0leading, Jets.subjet1_phi, Jets.subjet0_phi  )
    centroid2_m = np.where( subjet0leading, Jets.subjet1_m, Jets.subjet0_m  )
    pcentroid1 = uproot_methods.TLorentzVectorArray.from_ptetaphim( centroid1_pt, centroid1_eta, centroid1_phi, centroid1_m )
    pcentroid2 = uproot_methods.TLorentzVectorArray.from_ptetaphim( centroid2_pt, centroid2_eta, centroid2_phi, centroid2_m )
    
    #distance between the two centroids
    r0 = pcentroid1.delta_r(pcentroid2)
    
    centroid1 = uproot_methods.TVector3Array.from_cartesian(pcentroid1.x, pcentroid1.y, pcentroid1.z)
    centroid2 = uproot_methods.TVector3Array.from_cartesian(pcentroid2.x, pcentroid2.y, pcentroid2.z)
    particle_p3s = uproot_methods.TVector3Array.from_cartesian(jetparticles.p4.x, jetparticles.p4.y, jetparticles.p4.z)
    
    # Rotate coordinate system so "centroid1" is along the x axis
    rot1_axis, rot1_angle = coord_rotation( centroid1, uproot_methods.TVector3(1,0,0) )    
    centroid1 = centroid1.rotate_axis(rot1_axis, rot1_angle)
    centroid2 = centroid2.rotate_axis(rot1_axis, rot1_angle)
    particle_p3s = particle_p3s.rotate_axis(rot1_axis, rot1_angle)

    # Rotate about the z axis so that centroid2 is pointing down
    rot2_angle = -np.arctan2( centroid2.z, centroid2.y )
    centroid2 = centroid2.rotatex(rot2_angle)
    particle_p3s = particle_p3s.rotatex(rot2_angle)
    
    # Scale the image so that centroid2 is at (0,1,0)
    deta = np.divide(pseudorapidity(particle_p3s.theta),r0)
    dphi = np.divide(particle_p3s.phi,r0)
        
    return deta,dphi,centroid1,centroid2

In [45]:
# Look at ProcessorABC to see the expected methods and what they are supposed to do
class JetImageProcessor(processor.ProcessorABC):
    def __init__(self):
        self.eta_bins = 16
        self.phi_bins = 16
        dataset_axis = hist.Cat("dataset", "Primary dataset")
        event_axis = hist.Bin("event", "Event number", 10000, 0, 10000 )
        pdgid_axis = hist.Bin("pdgid", r"PDG ID", 10000, -5000, 5000)
        eta_axis = hist.Bin("eta", r"$\eta$", 32, -5, 5)
        phi_axis = hist.Bin("phi", r"$\phi$", 32, -np.pi, np.pi)
        image_dphi = hist.Bin("image_dphi", r"$Q_1$", self.phi_bins, -2, 2) #yaxis
        image_deta = hist.Bin("image_deta", r"$Q_2$", self.eta_bins, -2, 2) #xaxis
        pt_axis = hist.Bin("pt", r"$p_{T}$ [GeV]", 1000, 0, 5000)
        chPt_axis = hist.Bin("chPt", r"$p_{T}$ [GeV]", 100, 0, 500)
        nhPt_axis = hist.Bin("nhPt", r"$p_{T}$ [GeV]", 100, 0, 500)
        hadron_axis = hist.Cat("hadron", "Hadron Category")
        m_axis = hist.Bin("m", r"$p_{T}$ [GeV]", 200, 0, 500)
        dr_axis = hist.Bin("dr", r"$\Delta r$", 80, 0, 0.8)
        tau_axis = hist.Bin("tau", r"$\tau$", 100, 0, 1.0)
        mult_axis = hist.Bin("mult", "Multiplicity", 50, 0, 50)
        pullAngle_axis = hist.Bin("pullAngle", "Jet Pull Angle", 100, -5, 5)
        pull_axis = hist.Bin("pull", "Jet Pull", 100, 0, 1)

        
        self._accumulator = processor.dict_accumulator({
            'pt':hist.Hist("Counts", dataset_axis, pt_axis),
            'eta':hist.Hist("Counts", dataset_axis, eta_axis),
            'flippedImages': processor.column_accumulator(np.zeros(shape=(0,16,16))),
            'jetEta': processor.column_accumulator(np.zeros(shape=(0))),
            'jetPhi': processor.column_accumulator(np.zeros(shape=(0))),
            'jetPt': processor.column_accumulator(np.zeros(shape=(0))),
            'jetTau21': processor.column_accumulator(np.zeros(shape=(0))),
            'jetTau31': processor.column_accumulator(np.zeros(shape=(0))),
            'jetTau32': processor.column_accumulator(np.zeros(shape=(0))),
            'chPt':hist.Hist("Counts", dataset_axis, chPt_axis, hadron_axis),
            'hmult':hist.Hist("Counts", dataset_axis, mult_axis, hadron_axis),# event_axis),
            'h_muMult':hist.Hist("Counts", dataset_axis, mult_axis),
            'h_eleMult':hist.Hist("Counts", dataset_axis, mult_axis),
            'h_phoMult':hist.Hist("Counts", dataset_axis, mult_axis),
            'h_jetPull':hist.Hist("Counts", dataset_axis, pull_axis),
            'h_jetPullAngle':hist.Hist("Counts", dataset_axis, pullAngle_axis),

            'jetconstEvnum': processor.column_accumulator(np.zeros(shape=(0))),
            'jetconstEta': processor.column_accumulator(np.zeros(shape=(0))),
            'jetconstPhi': processor.column_accumulator(np.zeros(shape=(0))),
            'jetconstPt': processor.column_accumulator(np.zeros(shape=(0))),
            'chargedHadronMult': processor.column_accumulator(np.zeros(shape=(0))),
            'neutralHadronMult': processor.column_accumulator(np.zeros(shape=(0))),
            'photonMult': processor.column_accumulator(np.zeros(shape=(0))),
            'electronMult': processor.column_accumulator(np.zeros(shape=(0))),
            'muonMult': processor.column_accumulator(np.zeros(shape=(0))), 
            'chargedHadronPt': processor.column_accumulator(np.zeros(shape=(0))),
            'chargedHadronEta': processor.column_accumulator(np.zeros(shape=(0))),
            'chargedHadronPhi': processor.column_accumulator(np.zeros(shape=(0))),
            'neutralHadronPt': processor.column_accumulator(np.zeros(shape=(0))),
            'neutralHadronEta': processor.column_accumulator(np.zeros(shape=(0))),
            'neutralHadronPhi': processor.column_accumulator(np.zeros(shape=(0))),
            'jetconstCharge': processor.column_accumulator(np.zeros(shape=(0))),
            'label': processor.column_accumulator(np.zeros(shape=(0,4))),
            'image':hist.Hist(r"$p_{T}$", dataset_axis, event_axis, image_dphi, image_deta),
            'dr':hist.Hist("Counts", dataset_axis, dr_axis),
            'dr_true':hist.Hist("Counts", dataset_axis, dr_axis),
            'pdgid':hist.Hist("Counts", dataset_axis, pdgid_axis),
            'tau21':hist.Hist("Counts", dataset_axis, tau_axis),
            'tau31':hist.Hist("Counts", dataset_axis, tau_axis),
            'tau32':hist.Hist("Counts", dataset_axis, tau_axis),
            'cutflow': processor.defaultdict_accumulator(int)
        })
    
    @property
    def accumulator(self):
        return self._accumulator
    
    def process(self, df):
        
        
        output = self.accumulator.identity()

        
        dataset = df['dataset']
        evnum = df['eventNum']
        
        Jets = JaggedCandidateArray.candidatesfromcounts(
            df['nJet'],
            pt=df['jet_pt'],
            eta=df['jet_eta'],
            phi=df['jet_phi'],
            mass=df['jet_m'],
            msd=df['jet_msd'],
            subjet0_pt=df['jet_subjet0_pt'],
            subjet0_eta=df['jet_subjet0_eta'],
            subjet0_phi=df['jet_subjet0_phi'],
            subjet0_m=df['jet_subjet0_m'],
            subjet1_pt=df['jet_subjet1_pt'],
            subjet1_eta=df['jet_subjet1_eta'],
            subjet1_phi=df['jet_subjet1_phi'],
            subjet1_m=df['jet_subjet1_m'],
            tau1=df['jet_tau1'],
            tau2=df['jet_tau2'],
            tau3=df['jet_tau3'],
            tau4=df['jet_tau4'],
            ic=df['jet_ic'],
            nc=df['jet_nc']
            )        
        ntuple_constituents = JaggedCandidateArray.candidatesfromcounts(
            df['nConstituent'],
            pt=df['constituent_pt'],
            eta=df['constituent_eta'],
            phi=df['constituent_phi'],
            mass=df['constituent_m'],
            pdgid=df['constituent_id'],
            status=df['constituent_status']
            )
        
        pullconstituents = ntuple_constituents
        deta,dphi,centroid1,centroid2= transform_to_softdrop_axes(ntuple_constituents, Jets[:,0])        
        ntuple_constituents.add_attributes( deta=deta )
        ntuple_constituents.add_attributes( dphi=dphi )
        
       # print(evnum)
        
        constituents = ntuple_constituents[ntuple_constituents.pt > 1]
        pulljets = Jets
        Jets = Jets[Jets.pt > 200]
        oneJet = (Jets.counts >=1)
        Jets = Jets[oneJet]
        constituents = constituents[oneJet]
        print(constituents.shape)
        print(constituents)

        
        
        ## calculating jet pull ###

        #number of constituents per leading jet
        nc_ndarray = Jets.nc[:,0]
        #indices of constituents of the leading jet
        ic_ndarray = Jets.ic[:,0]
        print("nc_ndarray.shape", nc_ndarray.shape)
        print("ic_ndarray.shape", ic_ndarray.shape)
        print("nc_ndarray", nc_ndarray)
        print("ic_ndarray", ic_ndarray)
        
        ic = JaggedArray.fromiter(ic_ndarray)
        nc = JaggedArray.fromiter(nc_ndarray)
        print("ic.shape", ic.shape)
        print("ic", ic)
        print("nc.shape", nc.shape)
        print("nc", nc)
        valid_indices = ic > 0
        print("valid_indices.shape", valid_indices.shape)
        print("valid_indices", valid_indices)
        
        # These are "flat" C++ arrays, so they get converted to numpy ndarrays,
        # not JaggedArrays
        maxindex = 50        
        jetconstituents = constituents[ic & valid_indices]
        print(jetconstituents.shape)
        
        index_vals = np.indices( ic.shape )[1]
        valid_constituent_mask = (index_vals.T < valid_constituent_indices).T

        print(constituents.shape)
        print(constituents.offsets)
        print(constituents.pt)
        print(valid_constituent_mask.shape)
        print(valid_constituent_mask)
        
        
        
        validImages = np.zeros(int(np.max(evnum)+1), dtype=bool)
        evnum = evnum[oneJet]
        validImages[evnum] = True
                
        dr = Jets[:,0].p4.delta_r(constituents.p4)
        jetconstituents = constituents[valid_constituent_indices] * valid_constituent_mask
        #jetconstituents = constituents[dr < 0.8]
        #print(Jets.nc[:,0])
        
        # Make a Jagged Array of number of constituents from offets
        #jaggedshape = JaggedArray.fromoffsets( Jets.ic[:,0].offsets, np.ones(deta.flatten().shape[0]))
        #eventReshaped = jaggedshape * evnum        
        
        #valid_constituents = Jets.ic[:,0] < Jets.nc[:,0][0]
        #jetconstituents = constituents[ valid_constituents ]

        deta = jetconstituents.deta
        dphi = jetconstituents.dphi
        
        # Get the leading 20 constituents in each event for
        # output to a npz file for deepak8 like architechtures. 
        truncated_constituents = jetconstituents[jetconstituents.pt.argsort()][:,:20]

        
        # Make a Jagged Array of event numbers with the correct offsets
        jaggedshape = JaggedArray.fromoffsets( jetconstituents.offsets, np.ones(deta.flatten().shape[0]))
        eventReshaped = jaggedshape * evnum
        
        # Performing parity flip so that sum(right) is higher than sum(left) for each image
        eventbins = np.arange(len(validImages)+1)
        xbins= np.linspace(-2,2,num=self.eta_bins+1)
        ybins= np.linspace(-2,2,num=self.phi_bins+1)
        
        # For the images, normalize each constituent to the jet pt
        # to remove any pt dependence
        const_pts = jetconstituents.pt
        normed_pts = const_pts / Jets[:,0].p4.pt
        
        #print('normed_pts = ', normed_pts)
    
        
        H_images = np.histogramdd((eventReshaped.flatten(),dphi.flatten(),deta.flatten()),
                    bins=(eventbins ,ybins, xbins), weights = normed_pts.flatten())[0]
        H_images = H_images[validImages]
        mask = (np.sum(H_images[:,:,:int(self.eta_bins/2)], axis = (1,2)) < np.sum(H_images[:,:,int(self.eta_bins/2):], axis = (1,2)))
        sign = np.where(mask, 1, -1)
        deta = (sign*deta)
        H_images_new = np.histogramdd((eventReshaped.flatten(),dphi.flatten(),deta.flatten()),
                    bins=(eventbins ,ybins, xbins), weights = normed_pts.flatten())[0]

        labels = np.zeros([len(deta),4])
        if ("Zprime1TeV" in dataset):
            labels[:,1]=1
        elif ("Zprime2TeV" in dataset):
            labels[:,2]=1
        elif ("Zprime3TeV" in dataset):
            labels[:,3]=1
        else:
            labels[:,0]=1
            
        charge = np.zeros([len(constituents.pt)])
        idfound1 = abs(jetconstituents.pdgid) == 2212 # proton
        idfound2 = abs(jetconstituents.pdgid) == 211  # pion
        idfound3 = abs(jetconstituents.pdgid) == 321  # kaon
        idfound4 = abs(jetconstituents.pdgid) == 2112 # neutron
        idfound5 = abs(jetconstituents.pdgid) == 130  # K-long
        idfound6 = abs(jetconstituents.pdgid) == 22   # photon    
        idfound7 = abs(jetconstituents.pdgid) == 11
        idfound8 = abs(jetconstituents.pdgid) == 12
        idfound9 = abs(jetconstituents.pdgid) == 13
        idfound10 = abs(jetconstituents.pdgid) == 14
        idfound11 = abs(jetconstituents.pdgid) == 15
        idfound12 = abs(jetconstituents.pdgid) == 16
        # all else = 11, 12, 13, 14, 15, 16 (leptons and lepton neutrinos)
        ids = idfound1 | idfound2 | idfound3 | idfound4 | idfound5 | idfound6 | idfound8 | idfound8 | idfound9 | idfound10 | idfound11 | idfound12
        
        isEle = abs(jetconstituents.pdgid) == 11
        isMuon = abs(jetconstituents.pdgid) == 13
        isPho = abs(jetconstituents.pdgid) == 22
        
        ID = jetconstituents.pdgid
        chID = idfound1 | idfound2 | idfound3
        isFinal = (jetconstituents.status == 91)
        nhID = idfound4 | idfound5
        nonHID = ~chID & ~nhID
        
        #pdgs = jetconstituents[~ids].pdgid
        #pdgbool = np.abs(pdgs) > np.zeros_like(jetconstituents)
        #print(pdgs[np.where(pdgbool)])
                
        chargedHadrons = jetconstituents[chID & isFinal]
        neutralHadrons = jetconstituents[nhID & isFinal]
        nonHadrons = jetconstituents[nonHID & isFinal]
        photons = jetconstituents[isPho & isFinal]
        electrons = jetconstituents[isEle & isFinal]
        muons = jetconstituents[isMuon & isFinal]
        
        truncated_chargedHadrons = chargedHadrons[chargedHadrons.pt.argsort()][:,:20]
        truncated_neutralHadrons = neutralHadrons[neutralHadrons.pt.argsort()][:,:20]
        truncated_nonHadrons = nonHadrons[nonHadrons.pt.argsort()][:,:20]
     
        chargedHadronsPt_jagged = JaggedArray.fromiter(chargedHadrons.p4.pt)
        neutralHadronsPt_jagged = JaggedArray.fromiter(neutralHadrons.p4.pt)
        nonHadronsPt_jagged = JaggedArray.fromiter(nonHadrons.p4.pt)
        photonsPt_jagged = JaggedArray.fromiter(photons.p4.pt)
        electronsPt_jagged = JaggedArray.fromiter(electrons.p4.pt)
        muonsPt_jagged = JaggedArray.fromiter(muons.p4.pt)
        
        chargedHadronMult = chargedHadronsPt_jagged.count()
        neutralHadronMult = neutralHadronsPt_jagged.count()
        nonHadronMult = nonHadronsPt_jagged.count()
        photonMult = photonsPt_jagged.count()
        electronMult = electronsPt_jagged.count()
        muonMult = muonsPt_jagged.count()
                
        output['pt'].fill(dataset=dataset,
                          pt=Jets.pt[:,0].flatten())
        output['chPt'].fill(dataset=dataset,
                            chPt=chargedHadrons.pt.flatten(),
                            hadron="Charged Hadrons",
                           )
        output['chPt'].fill(dataset=dataset,
                            chPt=neutralHadrons.pt.flatten(),
                            hadron="Neutral Hadrons",
                           )
        output['chPt'].fill(dataset=dataset,
                            chPt=nonHadrons.pt.flatten(),
                            hadron="Non Hadrons",
                           )
        
        output['hmult'].fill(dataset=dataset,
                            mult=chargedHadronMult,
                            hadron="Charged Hadrons",
                           )
        output['hmult'].fill(dataset=dataset,
                            mult=neutralHadronMult,
                            hadron="Neutral Hadrons",
                           )
        output['hmult'].fill(dataset=dataset,
                            mult=nonHadronMult,
                            hadron="Non Hadrons",
                           )
        output['h_phoMult'].fill(dataset=dataset,
                            mult=photonMult,
                           )        
       
        output['h_eleMult'].fill(dataset=dataset,
                            mult=electronMult,
                           )   
        
        output['h_muMult'].fill(dataset=dataset,
                            mult=muonMult,
                           )           
        tau1 = Jets.tau1[:,0].flatten()
        tau2 = Jets.tau2[:,0].flatten()
        tau3 = Jets.tau3[:,0].flatten()
        tau4 = Jets.tau4[:,0].flatten()
        
        tau21 = np.where( tau1 > 0, tau2/tau1, 0)
        tau31 = np.where( tau1 > 0, tau3/tau1, 0)
        tau32 = np.where( tau2 > 0, tau3/tau2, 0)
        
        
        constituent_pt = jetconstituents.pt
        constituent_y = jetconstituents.p4.rapidity
        constituent_phi = jetconstituents.p4.phi
        
        #calculate phi and y components of r_i
        pull_dphi = jetconstituents.p4.delta_phi( Jets[:,0].p4 )
        pull_dy = jetconstituents.p4.rapidity - Jets[:,0].p4.rapidity
        pull_r = jetconstituents.p4.delta_r( Jets[:,0].p4 )

        #calculate phi and y components of pull
        valtemp = np.divide( np.multiply(constituent_pt, pull_r), Jets[:,0].p4.pt )
        pull_i_phi = np.multiply(valtemp, pull_dphi)
        pull_i_y = np.multiply(valtemp, pull_dy)
        
        #sum components of pull
        pull_phi = np.sum( pull_i_phi.flatten() )
        pull_y = np.sum( pull_i_y.flatten() )
        pull_mag = np.sqrt(np.power(pull_phi,2) + np.power(pull_y,2))
        #calculate pull angle in polar coordinates
        pull_angle = np.arctan2(pull_phi, pull_y)

        
        output['eta'].fill(dataset=dataset,
                           eta=Jets.eta[:,0].flatten())
        output['flippedImages'] += processor.column_accumulator(H_images_new[validImages])
        output['jetPt'] += processor.column_accumulator(Jets.pt[:,0].flatten())
        output['jetEta'] += processor.column_accumulator(Jets.eta[:,0].flatten())
        output['jetPhi'] += processor.column_accumulator(Jets.phi[:,0].flatten())
        output['jetTau21'] += processor.column_accumulator(tau21)
        output['jetTau31'] += processor.column_accumulator(tau31)
        output['jetTau32'] += processor.column_accumulator(tau32)

        
        output['jetconstEvnum'] += processor.column_accumulator(np.repeat(eventReshaped[:,:1].flatten(), 20, axis=0))
        output['jetconstPt'] += processor.column_accumulator(pd.DataFrame(truncated_constituents.p4.pt).fillna(0.0).values.flatten())
        output['jetconstEta'] += processor.column_accumulator(pd.DataFrame(truncated_constituents.p4.eta).fillna(0.0).values.flatten())
        output['jetconstPhi'] += processor.column_accumulator(pd.DataFrame(truncated_constituents.p4.phi).fillna(0.0).values.flatten())
        output['chargedHadronMult'] += processor.column_accumulator(pd.DataFrame(chargedHadronMult).fillna(0.0).values.flatten())
        output['neutralHadronMult'] += processor.column_accumulator(pd.DataFrame(neutralHadronMult).fillna(0.0).values.flatten())
        output['photonMult'] += processor.column_accumulator(pd.DataFrame(photonMult).fillna(0.0).values.flatten())
        output['electronMult'] += processor.column_accumulator(pd.DataFrame(electronMult).fillna(0.0).values.flatten())
        output['muonMult'] += processor.column_accumulator(pd.DataFrame(muonMult).fillna(0.0).values.flatten())
        output['chargedHadronPt'] += processor.column_accumulator(pd.DataFrame(truncated_chargedHadrons.p4.pt).fillna(0.0).values.flatten())
        output['chargedHadronEta'] += processor.column_accumulator(pd.DataFrame(truncated_chargedHadrons.p4.pt).fillna(0.0).values.flatten())
        output['chargedHadronPhi'] += processor.column_accumulator(pd.DataFrame(truncated_chargedHadrons.p4.pt).fillna(0.0).values.flatten())
        output['neutralHadronPt'] += processor.column_accumulator(pd.DataFrame(truncated_neutralHadrons.p4.pt).fillna(0.0).values.flatten())
        output['neutralHadronEta'] += processor.column_accumulator(pd.DataFrame(truncated_neutralHadrons.p4.pt).fillna(0.0).values.flatten())
        output['neutralHadronPhi'] += processor.column_accumulator(pd.DataFrame(truncated_neutralHadrons.p4.pt).fillna(0.0).values.flatten())
        output['label'] += processor.column_accumulator(labels)
        output['image'].fill(dataset=dataset,
                            event=eventReshaped.flatten(), 
                            image_deta=deta.flatten(),
                            image_dphi=dphi.flatten(),
                            weight=normed_pts.flatten())
        output['dr'].fill(dataset=dataset,
                          dr=dr.flatten())
        output['pdgid'].fill(dataset=dataset,
                             pdgid=jetconstituents.pdgid.flatten())
        output['tau21'].fill(dataset=dataset,
                          tau=tau21)
        output['tau31'].fill(dataset=dataset,
                          tau=tau31)
        output['tau32'].fill(dataset=dataset,
                          tau=tau32)
        
        output['h_jetPull'].fill(dataset=dataset,
                          pull=pull_mag.flatten())
        
        output['h_jetPullAngle'].fill(dataset=dataset,
                          pullAngle=pull_angle.flatten())
        
        
        return output

    def postprocess(self, accumulator):
        return accumulator

In [46]:
fileset={
     'Zprime1TeV':['/mnt/data/ml/gravkk_zz_1TeV_addevnum.root'],
     #'Zprime2TeV':['/mnt/data/ml/gravkk_zz_2TeV_addevnum.root'],
     #'Zprime3TeV':['/mnt/data/ml/gravkk_zz_3TeV_addevnum.root'],
     #'QCD':['/mnt/data/ml/qcd_flat15to7000_addevnum.root']
}
tstart = time.time() 
output = processor.run_uproot_job(fileset,
                                  treename='T',
                                  processor_instance=JetImageProcessor(),
                                  executor=processor.futures_executor,
                                  executor_args={'workers':1, 'flatten': True},
                                  chunksize=1, maxchunks=1)
elapsed = time.time() - tstart
# print(output)

HBox(children=(IntProgress(value=0, description='Preprocessing', max=1, style=ProgressStyle(description_width=…




HBox(children=(IntProgress(value=0, description='Processing', max=2, style=ProgressStyle(description_width='in…

(1,)
[[<Row 3> <Row 4> <Row 5> ... <Row 480> <Row 497> <Row 498>]]
ic.shape (1,)
ic [[235 231 325 ... 0 0 0]]
nc.shape (1,)
nc [45]
valid_indices.shape (1,)
valid_indices [[True True True ... False False False]]
(1,)
(1,)
[[<Row 0> <Row 1> <Row 3> ... <Row 701> <Row 702> <Row 707>]]
ic.shape (1,)
ic [[366 46 346 ... 172 491 592]]
nc.shape (1,)
nc [53]
valid_indices.shape (1,)
valid_indices [[True True True ... True True True]]
(1,)



IndexError: index 1 is out of bounds for axis 0 with size 1

In [None]:
parameters = list(output)
print(parameters)
dataStream(output)

In [None]:
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm


In [None]:
plt.rcParams['figure.dpi'] = 90

In [None]:
fig, ax, grid = hist.plot2d(output['image'].integrate('dataset', 'Zprime1TeV').integrate("event",1),
                            xaxis='image_deta', patch_opts={'norm':LogNorm(vmin=1e-3, vmax=1),'cmap':'Greens'})
print(fig)
fig.savefig("Zprime1TeV_eventImage.png")

In [None]:
fig, ax, grid = hist.plot2d(output['image'].integrate('dataset', 'Zprime1TeV').integrate("event"),
                            xaxis='image_deta', patch_opts={'norm':LogNorm(vmin=1, vmax=1e3),'cmap':'Greens'})
print(fig)
fig.savefig("Zprime1TeV_image.png")

In [None]:
fig, ax, grid = hist.plot2d(output['image'].integrate('dataset', 'Zprime2TeV').integrate("event"),
                            xaxis='image_deta', patch_opts={'norm':LogNorm(vmin=1, vmax=1e3),'cmap':'Greens'})
print(fig)
fig.savefig("Zprime2TeV_image.png")

In [None]:
fig, ax, grid = hist.plot2d(output['image'].integrate('dataset', 'Zprime3TeV').integrate("event"),
                            xaxis='image_deta', patch_opts={'norm':LogNorm(vmin=1, vmax=1e3),'cmap':'Greens'})
print(fig)
fig.savefig("Zprime3TeV_image.png")

In [None]:
fig, ax, grid = hist.plot2d(output['image'].integrate('dataset', 'Zprime3TeV').integrate("event",1)
                            ,xaxis='image_deta', patch_opts={'norm':LogNorm(vmin=1e-3, vmax=1),'cmap':'Greens'})
print(fig)
fig.savefig("Zprime3TeV_eventImage.png")

In [None]:
fig, ax, grid = hist.plot2d(output['image'].integrate('dataset', 'QCD').integrate("event"),
                            xaxis='image_deta', patch_opts={'norm':LogNorm(vmin=1, vmax=1e3),'cmap':'Greens'})
print(fig)
fig.savefig("QCD_image.png")

In [None]:
fig, ax = hist.plotgrid(output['dr'], overlay="dataset", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
fig.show()
fig.savefig("dr_plot.png")

In [None]:
fig, ax = hist.plotgrid(output['pt'], overlay="dataset", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
fig.show()
fig.savefig("pt_plot.png")

In [None]:
fig, ax = hist.plotgrid(output['tau21'], overlay="dataset", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
    iax.set_xlabel(r'$\tau_{21}$')
fig.show()
fig.savefig("tau_plot.png")

In [None]:
fig, ax = hist.plotgrid(output['chPt'].integrate("dataset"), overlay="hadron", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
    
fig.show()
fig.savefig("chPt_plot.png")

In [None]:
fig, ax = hist.plotgrid(output['hmult'].integrate("hadron","Charged Hadrons"), overlay="dataset", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
    iax.set_xlabel("Charged Hadron Multicplicity")
    
fig.show()
fig.savefig("chMult_plot.png")

In [None]:
fig, ax = hist.plotgrid(output['hmult'].integrate("hadron","Neutral Hadrons"), overlay="dataset", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
    
fig.show()
fig.savefig("neutMult_plot.png")

In [None]:
fig, ax = hist.plotgrid(output['hmult'].integrate("hadron","Charged Hadrons"), overlay="dataset", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
    
fig.show()


In [None]:
fig, ax = hist.plotgrid(output['h_eleMult'], overlay="dataset", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
    iax.set_xlim([0,10])
    iax.set_title('Electron Multiplicity')
    iax.set_xlabel('Electron Multiplicity')

fig.show()
fig.savefig("eleMult_plot.png")

In [None]:
fig, ax = hist.plotgrid(output['h_muMult'], overlay="dataset", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
    iax.set_xlim([0,10])
    iax.set_title('Muon Multiplicity')
    iax.set_xlabel('Muon Multiplicity')

fig.show()
fig.savefig("muMult_plot.png")

In [None]:
fig, ax = hist.plotgrid(output['h_phoMult'], overlay="dataset", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
    iax.set_title('Photon Multiplicity')
    iax.set_xlabel('Photon Multiplicity')



fig.show()
fig.savefig("phoMult_plot.png")

In [None]:
fig, ax = hist.plotgrid(output['h_jetPullAngle'], overlay="dataset", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
    iax.set_title('Jet Pull Angle')


fig.show()
fig.savefig("pullAngle_plot.png")

In [None]:
fig, ax = hist.plotgrid(output['h_jetPull'], overlay="dataset", stack=False, density=True
                                 )
for iax in ax.flatten():
    iax.autoscale(axis='y')
    iax.set_title('Jet Pull')
    iax.set_xlabel('Jet Pull')
    iax.set_xlim([0,0.1])


fig.show()
fig.savefig("jetPull_plot.png")