In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"    
os.environ["KERAS_BACKEND"] = "tensorflow"

import uproot
import matplotlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import networkx as nx
import copy
import math
import keras
import glob
import sys
import datetime

In [None]:
from itertools import combinations

In [None]:
%matplotlib inline

In [None]:
def get_index_triu_vector(i, j, vecsize):
    k = j - i - 1
    k += i*vecsize
    missing = int(i*(i+1)/2)
    k -= missing
    return k

In [60]:
#This file created using test/ntuplizer.py
fn = "/storage/user/jpata/particleflow/data/testing/step3_AOD_ntuple.root"
fi = uproot.open(fn)
tree = fi.get("pftree")
data = tree.arrays(tree.keys())
data = {str(k, 'ascii'): v for k, v in data.items()}

## Full event

In [None]:
iev=0

#remove tracks for which the 
trks_outer_valid = (data["tracks_outer_eta"][iev]!= 0) & (data["tracks_outer_phi"][iev]!= 0)
trks_inner_valid = (data["tracks_inner_eta"][iev]!= 0) & (data["tracks_inner_phi"][iev]!= 0)

plt.figure(figsize=(20, 20))
plt.scatter(data["clusters_eta"][iev], data["clusters_phi"][iev], marker="s", label="cluster (N={0})".format(len(data["clusters_eta"][iev])))
plt.scatter(data["tracks_eta"][iev], data["tracks_phi"][iev], marker="x", label="general track (N={0})".format(len(data["tracks_eta"][iev])))
plt.scatter(data["tracks_inner_eta"][iev][trks_inner_valid], data["tracks_inner_phi"][iev][trks_inner_valid], marker="x", label="inner track (N={0})".format(np.sum(trks_inner_valid)))
plt.scatter(data["tracks_outer_eta"][iev][trks_outer_valid], data["tracks_outer_phi"][iev][trks_outer_valid], marker="x", label="outer track (N={0})".format(np.sum(trks_outer_valid)))
plt.scatter(data["pfcands_eta"][iev], data["pfcands_phi"][iev], marker="o", alpha=0.2, s=100, color="black", label="candidate (N={0})".format(len(data["pfcands_eta"][iev])))
plt.scatter(data["genparticles_eta"][iev], data["genparticles_phi"][iev], marker="o", alpha=0.2, s=200, color="purple", label="genparticle (N={0})".format(len(data["genparticles_eta"][iev])))

plt.legend()

## Elements to candidates

In [None]:
#get the list of element (iblock, ielem) to candidate associations
linktree = fi.get("linktree_elemtocand")
data_elemtocand = linktree.arrays(linktree.keys())
data_elemtocand = {str(k, 'ascii'): v for k, v in data_elemtocand.items()}

In [None]:
pfgraph = nx.Graph()

node_pos = {}
node_colors = {}

for i in range(len(data["clusters_iblock"][iev])):
    ibl = data["clusters_iblock"][iev][i]
    iel = data["clusters_ielem"][iev][i]
    this = ("E", ibl, iel)
    node_pos[this] = data["clusters_eta"][iev][i], data["clusters_phi"][iev][i]
    node_colors[this] = "green"
    pfgraph.add_node(this)
    
for i in range(len(data["tracks_iblock"][iev])):
    ibl = data["tracks_iblock"][iev][i]
    iel = data["tracks_ielem"][iev][i]
    this = ("E", ibl, iel)
    #node_pos[this] = data["tracks_eta"][iev][i], data["tracks_phi"][iev][i]
    #node_pos[this] = data["tracks_inner_eta"][iev][i], data["tracks_inner_phi"][iev][i]
    node_pos[this] = data["tracks_outer_eta"][iev][i], data["tracks_outer_phi"][iev][i]
    if node_pos[this][0] == 0 and node_pos[this][1] == 0:
        node_pos[this] = data["tracks_inner_eta"][iev][i], data["tracks_inner_phi"][iev][i]
    if node_pos[this][0] == 0 and node_pos[this][1] == 0:
        node_pos[this] = data["tracks_eta"][iev][i], data["tracks_phi"][iev][i]
    node_colors[this] = "r"
    pfgraph.add_node(this)
    
for i in range(len(data["pfcands_iblock"][iev])):
    this = ("C", i)
    node_pos[this] = data["pfcands_eta"][iev][i], data["pfcands_phi"][iev][i]
    node_colors[this] = "black"
    pfgraph.add_node(this)
    
for i in range(len(data_elemtocand["linkdata_elemtocand_ielem"][iev])):
    ibl = data_elemtocand["linkdata_elemtocand_iblock"][iev][i]
    iel = data_elemtocand["linkdata_elemtocand_ielem"][iev][i]
    ic = data_elemtocand["linkdata_elemtocand_icand"][iev][i]
    u = ("E", ibl, iel)
    v = ("C", ic)
    if u in pfgraph.nodes and v in pfgraph.nodes:
        p0 = node_pos[u]
        p1 = node_pos[v]
        dist = math.sqrt((p0[0]-p1[0])**2 + (p0[1]-p1[1])**2)
        pfgraph.add_edge(u, v, weight=dist)

In [None]:
plt.figure(figsize=(10,10))
nx.draw(pfgraph, pos=node_pos, node_size=5, alpha=0.2, node_color=[node_colors[n] for n in pfgraph.nodes])

plt.axis('on')
plt.xlabel("$\eta$")
plt.ylabel("$\phi$")

In [None]:
np.unique(data_elemtocand["linkdata_elemtocand_icand"][iev])

In [None]:
sel = data_elemtocand["linkdata_elemtocand_icand"][iev] == 1

In [None]:
data_elemtocand["linkdata_elemtocand_iblock"][iev][sel]

In [None]:
data_elemtocand["linkdata_elemtocand_ielem"][iev][sel]

In [None]:
s2 = data["clusters_iblock"][iev] == 13
data["clusters_ielem"][iev][s2]

In [None]:
data["clusters_iblock"][iev]

In [None]:
#There may be multiple PFCandidates assigned to a certain block,element pair
#Choose the highest pt candidate
def assign_cand(iblocks, ielems, cands_pt, _i, dd):
    icands = dd.get((iblocks[_i], ielems[_i]), [])
    pts = [cands_pt[j] for j in icands]
    icands_sorted = sorted(zip(icands, pts), reverse=True, key=lambda x: x[1])
    if len(icands_sorted) > 0:
        return icands_sorted[0][0]
    else:
        return -1
    
def prepare_data(data, data_elemtocand, iev):
    
    bls = data_elemtocand["linkdata_elemtocand_iblock"][iev]
    els = data_elemtocand["linkdata_elemtocand_ielem"][iev]
    cnds = data_elemtocand["linkdata_elemtocand_icand"][iev]

    dd = {}
    for _i in range(len(cnds)):
        k = (bls[_i], els[_i])
        if not k in dd:
            dd[k] = []
        dd[k] += [cnds[_i]]
    
    #clusters
    X1 = np.vstack([
        data["clusters_type"][iev],
        data["clusters_energy"][iev],
        data["clusters_eta"][iev],
        data["clusters_phi"][iev]]
    ).T
    ys1 = np.array([assign_cand(data["clusters_iblock"][iev], data["clusters_ielem"][iev], data["pfcands_pt"][iev], i, dd) for i in range(len(data["clusters_phi"][iev]))])
    
    #tracks
    X2 = np.vstack([
        10*np.ones_like(data["tracks_qoverp"][iev]),
        data["tracks_qoverp"][iev],
        data["tracks_eta"][iev],
        data["tracks_phi"][iev],
        data["tracks_inner_eta"][iev],
        data["tracks_inner_phi"][iev],
        data["tracks_outer_eta"][iev],
        data["tracks_outer_phi"][iev]]
    ).T
    ys2 = np.array([assign_cand(data["tracks_iblock"][iev], data["tracks_ielem"][iev], data["pfcands_pt"][iev], i, dd) for i in range(len(data["tracks_phi"][iev]))])

    X1p = np.pad(X1, ((0,0),(0, X2.shape[1] - X1.shape[1])), mode="constant")
    X = np.vstack([X1p, X2])
    y = np.concatenate([ys1, ys2])
    
    cand_data = np.vstack([
        data["pfcands_pdgid"][iev],
        data["pfcands_pt"][iev],
        data["pfcands_eta"][iev],
        data["pfcands_phi"][iev]
    ]).T
    
    return X, y, cand_data

In [None]:
s = data_elemtocand["linkdata_elemtocand_icand"][iev] == 1
data_elemtocand["linkdata_elemtocand_iblock"][iev][s], data_elemtocand["linkdata_elemtocand_ielem"][iev][s]

In [None]:
X, y, cand_data = prepare_data(data, data_elemtocand, iev)

In [None]:
for yval in np.unique(y)[:100]:
    if yval >= 0:
        print("cand={0}".format(yval), "eltypes={0}".format(X[y==yval, 0]), "cand_data={0}".format(list(cand_data[yval])))

In [None]:
sub_graphs = list(nx.connected_component_subgraphs(pfgraph))

In [None]:
plt.hist([sg.size() for sg in sub_graphs], bins=np.linspace(0,20,21))
plt.yscale("log")
plt.xticks(np.linspace(0,20,21))
plt.xlabel("subgraph size")
plt.ylabel("number of subgraphs")

In [None]:
plt.figure(figsize=(10,10))
for i in range(16):
    
    sg = sub_graphs[i]
    ax = plt.subplot(4,4,i+1)
    nx.draw(sg, node_color=[node_colors[n] for n in sg.nodes], ax=ax, node_size=20)
    plt.axis('on')
#     plt.xlabel("$\eta$")
#     plt.ylabel("$\phi$")
    plt.title("size={0}".format(sg.size()))
plt.tight_layout()

In [None]:
bins = np.linspace(0,5000,20)
plt.hist(data["nclusters"], bins=bins, histtype="step", lw=2, label="clusters");
plt.hist(data["ntracks"], bins=bins, histtype="step", lw=2, label="tracks");
plt.hist(data["npfcands"], bins=bins, histtype="step", lw=2, label="PF candidates");
plt.legend(loc="best")
plt.xlabel("number of objects")

## Detector subsystems

In [None]:
#Event number to plot
i = 0

#minimum pt of the pf candidate
pfcand_ptcut = 2


c_eta = data["clusters_eta"]
c_phi = data["clusters_phi"]
c_en = data["clusters_energy"]
c_type = data["clusters_type"]

t_o_eta = data["tracks_outer_eta"]
t_o_phi = data["tracks_outer_phi"]
t_i_eta = data["tracks_inner_eta"]
t_i_phi = data["tracks_inner_phi"]
t_q = np.sign(data["tracks_qoverp"])
t_en = 1.0/np.abs(data["tracks_qoverp"])

cand_eta = data["pfcands_eta"]
cand_phi = data["pfcands_phi"]
cand_en = data["pfcands_pt"]
cand_charge = data["pfcands_charge"]

g_eta = data["genparticles_eta"]
g_phi = data["genparticles_phi"]
g_en = data["genparticles_pt"]

plt.figure(figsize=(15,10))

ax = plt.subplot(2,3,1)
for ct in [2,3,4,5,8,9,11]:
    cluster_type_sel = c_type[i]==ct
    plt.scatter(
        c_eta[i][cluster_type_sel],
        c_phi[i][cluster_type_sel],
        s=c_en[i][cluster_type_sel],
        marker="s", alpha=0.5, label="{0}".format(ct))
plt.xlabel("$\eta$")
plt.ylabel("$\phi$")
plt.title("calo clusters (N={0})".format(len(c_eta[i])))
plt.xlim(-5,5)
plt.ylim(-5,5)
#plt.legend(loc="best", ncol=2)

ax = plt.subplot(2,3,2)
for track_charge in [-1, 1]:
    track_charge_sel = t_q[i] == track_charge
    plt.scatter(t_o_eta[i][track_charge_sel], t_o_phi[i][track_charge_sel], s=t_en[i][track_charge_sel], marker="o", alpha=0.5)
plt.xlabel("outer $\eta$")
plt.ylabel("outer $\phi$")
plt.title("tracks outer hit (N={0})".format(len(t_o_eta[i])))
plt.xlim(-5,5)
plt.ylim(-5,5)

ax = plt.subplot(2,3,3)
for track_charge in [-1, 1]:
    track_charge_sel = t_q[i] == track_charge
    plt.scatter(t_i_eta[i][track_charge_sel], t_i_phi[i][track_charge_sel], s=t_en[i][track_charge_sel], marker="o", alpha=0.5)
plt.xlabel("inner $\eta$")
plt.ylabel("inner $\phi$")
plt.title("tracks inner hit (N={0})".format(len(t_o_eta[i])))
plt.xlim(-5,5)
plt.ylim(-5,5)

ax = plt.subplot(2,3,4)
for _cand_charge in [-1, 1, 0]:
    cand_charge_sel = cand_charge[i] == _cand_charge
    plt.scatter(cand_eta[i][cand_charge_sel], cand_phi[i][cand_charge_sel], s=cand_en[i][cand_charge_sel], marker="o", alpha=0.5)
plt.xlabel("$\eta$")
plt.ylabel("$\phi$")
plt.title("PF candidates (N={0})".format(len(cand_eta[i])))
plt.xlim(-5,5)
plt.ylim(-5,5)

ax = plt.subplot(2,3,5)
cand_pt_sel = (cand_en[i] > pfcand_ptcut)
for _cand_charge in [-1, 1, 0]:
    cand_charge_sel = (cand_charge[i] == _cand_charge) & (cand_pt_sel)
    plt.scatter(
        cand_eta[i][cand_charge_sel],
        cand_phi[i][cand_charge_sel],
        s=cand_en[i][cand_charge_sel], marker="o", alpha=0.5)
plt.xlabel("$\eta$")
plt.ylabel("$\phi$")
plt.title("PF candidates with $p_T > {0}$ GeV, (N={1})".format(pfcand_ptcut, len(cand_eta[i][cand_pt_sel])))
plt.xlim(-5,5)
plt.ylim(-5,5)

ax = plt.subplot(2,3,6)
plt.scatter(g_eta[i], g_phi[i], s=g_en[i], marker="x", alpha=0.5)
plt.xlabel("$\eta$")
plt.ylabel("$\phi$")
plt.title("genparticles (N={0})".format(len(g_eta[i])))

plt.xlim(-5,5)
plt.ylim(-5,5)
plt.tight_layout()

In [None]:
from collections import Counter

In [None]:
all_blocks_counts_0 = []
all_blocks_counts_1 = []
all_blocks_counts_2 = []
all_blocks_counts_3 = []

for iev in range(len(data["clusters_iblock"])):
    bls = list(data["clusters_iblock"][iev])
    bls2 = list(data["tracks_iblock"][iev])
    bls_counts = sorted([v for k, v in Counter(bls+bls2).items()], reverse=True)
    all_blocks_counts_0 += [bls_counts[0]]
    all_blocks_counts_1 += [bls_counts[1]]
    all_blocks_counts_2 += [bls_counts[2]]
    all_blocks_counts_3 += [bls_counts[3]]

In [None]:
len(np.unique(list(data["clusters_iblock"][iev]) + list(data["tracks_iblock"][iev])))

In [None]:
bins = np.linspace(0,1.5*max(all_blocks_counts_0), 100)
plt.hist(all_blocks_counts_0, bins=bins, histtype="step", lw=2, label="first")
plt.hist(all_blocks_counts_1, bins=bins, histtype="step", lw=2, label="second")
plt.hist(all_blocks_counts_2, bins=bins, histtype="step", lw=2, label="third")

plt.yscale("log")
plt.legend(loc="best")
plt.xlabel("elements per block")

# Elements to blocks

In [None]:
tree2 = fi.get("linktree")
linkdata = tree2.arrays(tree2.keys())
linkdata = {str(k, 'ascii'): v for k, v in linkdata.items()}

In [None]:
pfgraph2 = nx.Graph()

items_block = {}
node_pos = {}
node_colors = {}
node_sizes = {}
node_types = {}

cluster_colors = {
    3: "blue",
    2: "blue",
    5: "red",
    4: "green",
    8: "orange",
    9: "orange",
    11: "purple",
}

for icluster, iblock in enumerate(data["clusters_iblock"][iev]):
    this = (iblock, data["clusters_ielem"][iev][icluster])
    pfgraph2.add_node(this)
    
    node_pos[this] = (data["clusters_eta"][iev][icluster], data["clusters_phi"][iev][icluster])
    tp = data["clusters_type"][iev][icluster]
    node_colors[this] = cluster_colors[tp]
    node_sizes[this] = data["clusters_energy"][iev][icluster]
    if not iblock in items_block.keys():
        items_block[iblock] = []
    items_block[iblock] += [this]
    node_types[this] = tp
    
for itrack, iblock in enumerate(data["tracks_iblock"][iev]):
    this = (iblock, data["tracks_ielem"][iev][itrack])
    pfgraph2.add_node(this)
    
    p = abs(data["tracks_qoverp"][iev][itrack])
    
    node_pos[this] = data["tracks_outer_eta"][iev][itrack], data["tracks_outer_phi"][iev][itrack]
    if node_pos[this][0] == 0 and node_pos[this][1] == 0:
        node_pos[this] = data["tracks_inner_eta"][iev][itrack], data["tracks_inner_phi"][iev][itrack]
    if node_pos[this][0] == 0 and node_pos[this][1] == 0:
        node_pos[this] = data["tracks_eta"][iev][itrack], data["tracks_phi"][iev][itrack]
        
    node_colors[this] = "black"
    node_sizes[this] = 10*p
    node_types[this] = 0

    if not iblock in items_block.keys():
        items_block[iblock] = []
    items_block[iblock] += [this]
    
for iblock, items in items_block.items():
    for comb in combinations(items, 2):
        pfgraph2.add_edge(comb[0], comb[1])

In [None]:
plt.figure(figsize=(10, 10))
plt.xlim(-5,5)
plt.ylim(-5,5)
nx.draw(pfgraph2,
        pos=node_pos,
        node_size=[node_sizes[n] for n in pfgraph2.nodes],
        node_color=[node_colors[n] for n in pfgraph2.nodes],
        edgelist=[],
        node_shape="s",
        alpha=0.5, width=0.05
)
plt.title("PF elements (N={0})".format(len(pfgraph2.nodes)))
plt.axis('on')
plt.xlabel("$\eta$")
plt.ylabel("$\phi$")

In [None]:
plt.figure(figsize=(10, 10))
plt.xlim(-5,5)
plt.ylim(-5,5)
nx.draw(pfgraph2,
        pos=node_pos,
        node_size=[node_sizes[n] for n in pfgraph2.nodes],
        node_color=[node_colors[n] for n in pfgraph2.nodes],
        node_shape="s",
        alpha=0.5, width=0.001
)
#plt.xticks(np.arange(-5,5,0.3))
#plt.yticks(np.arange(-5,5,0.3))

#plt.grid()
# plt.xlabel("cluster x / scaled track px")
# plt.ylabel("cluster y / scaled track px")
# plt.title("PF elements with blocks")
plt.axis('on')
plt.xlabel("$\eta$")
plt.ylabel("$\phi$")

In [None]:
biggest_clusters = sorted([
    (k, items_block[k]) for k in items_block.keys()], reverse=True, key=lambda x: len(x[1])
)[:4]

plt.figure(figsize=(10, 10))
iplot = 1

subs = []
for iblock, elems in biggest_clusters:
    nelem = len(elems)
    
    sub = nx.Graph()
    sub.add_nodes_from(items_block[iblock])
    
    n = linkdata["linkdata_nelem"][iev][linkdata["linkdata_iblock"][iev] == iblock]
    ks = linkdata["linkdata_k"][iev][linkdata["linkdata_iblock"][iev] == iblock]
    dists = linkdata["linkdata_distance"][iev][linkdata["linkdata_iblock"][iev] == iblock]
    kdict = {k: i for i, k in enumerate(ks)}

    vecsize = n[0]
    inds = np.triu_indices(n=vecsize, m=vecsize, k=0.1)
    inds2 = [get_index_triu_vector(i, j, vecsize) for i, j in zip(inds[0], inds[1])]
    for i, ind2 in enumerate(inds2):
        if ind2 in kdict.keys():
            idx_triu_i = inds[0][i]
            idx_triu_j = inds[1][i]
            if idx_triu_j > idx_triu_i:
                tmp = idx_triu_i
                idx_triu_i = idx_triu_j
                idx_triu_j = tmp
            u = (iblock, idx_triu_i)
            v = (iblock, idx_triu_j)
            if u in sub.nodes and v in sub.nodes:
                sub.add_edge(u, v, weight=dists[kdict[ind2]])
    subs += [sub]
    
    ax = plt.subplot(2,2,iplot)

    plt.xlim(-5, 5)
    plt.ylim(-5, 5)

    nx.draw(sub,
            pos=node_pos,
            node_size=[2*node_sizes[n] for n in sub.nodes],
            node_color=[node_colors[n] for n in sub.nodes],
            node_shape="s",
            alpha=0.5, width=0.05, ax=ax
    )
    
    pfcand_inds = (data["pfcands_iblock"][iev] == iblock)
    pf_pt = data["pfcands_pt"][iev][pfcand_inds]
    pf_phi = data["pfcands_phi"][iev][pfcand_inds]
    
    plt.text(0.0, 1.0,
        "ibl={0} Nel={1}, Npf={2}, <pt>={3:.2f}".format(
            iblock, nelem, len(pf_pt), np.mean(pf_pt)),
            transform=ax.transAxes, verticalalignment="bottom")
#     pf_x = 5*pf_pt * np.cos(pf_phi)
#     pf_y = 5*pf_pt * np.sin(pf_phi)
    
#     for ipf in range(len(pf_x)):
#         plt.plot([0, pf_x[ipf]], [0, pf_y[ipf]], color="blue")
    plt.axis('on')
    iplot += 1
    
plt.tight_layout()