<a href="https://colab.research.google.com/github/sznajder/Lectures/blob/master/FwdMuontHitGraphConstruction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Notebook to Construct and Analyze Forward mu hit graphs.

### Based on: 

https://github.com/jmduarte/heptrkx-gnn-tracking/blob/master/README.md

https://github.com/jmduarte/gnn-fpga/blob/master/README.md


In [1]:
import os
import sys
import numpy as np
import pandas as pd
from collections import namedtuple
import uproot
import h5py
import random 
import matplotlib.pyplot as plt
from google.colab import drive
#
drive.mount('/content/gdrive', force_remount=False)
sys.path.append('gdrive/My Drive/Colab Notebooks')


!pip install uproot
#!pip install ipynb
#!pip install import_ipynb


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


## Define Globals and Utility Functions

In [0]:
# Globals
kDT, kCSC, kRPC, kGEM, kME0 = 0, 1, 2, 3, 4
eta_bins = np.array([1.2, 1.4, 1.6, 1.8, 2.0, 2.15, 2.5])
eta_bins = eta_bins[::-1]
pt_bins = np.array([-0.50, -0.333333, -0.25, -0.20, -0.15, -0.10, -0.05, 0.05, 0.10, 0.15, 0.20, 0.25, 0.333333, 0.50])
nlayers = 12  # 5 (CSC) + 4 (RPC) + 3 (GEM)


# Event VARS from Root tree and HITVARS
VARS = ['ve_event','vh_size','vh_type', 'vh_sector', 'vh_station', 'vh_ring','vh_sim_z','vh_sim_r','vh_sim_phi', 'vh_sim_eta', 'vh_sim_theta','vh_bend','vh_sim_tp1','vh_sim_tp2']
HITVARS = ['vh_layer','vh_sector', 'vh_station', 'vh_ring','vh_sim_z','vh_sim_r','vh_sim_phi', 'vh_sim_eta', 'vh_sim_theta','vh_bend','vh_sim_tp1','vh_sim_tp2']


# Graph is a namedtuple of (X, Ri, Ro, y) for convenience
Graph = namedtuple('Graph', ['X', 'Ri', 'Ro', 'y'])


# Sparse graph uses the indices for the Ri, Ro matrices
SparseGraph = namedtuple('SparseGraph',
        ['X', 'Ri_rows', 'Ri_cols', 'Ro_rows', 'Ro_cols', 'y'])


# Decide EMTF hit layer number
emtf_lut = np.zeros((5,5,5), dtype=np.int32) - 99
emtf_lut[1,1,4] = 2  # ME1/1a
emtf_lut[1,1,1] = 2  # ME1/1b
emtf_lut[1,1,2] = 3  # ME1/2
emtf_lut[1,1,3] = 3  # ME1/3
emtf_lut[1,2,1] = 7  # ME2/1
emtf_lut[1,2,2] = 7  # ME2/2
emtf_lut[1,3,1] = 8  # ME3/1
emtf_lut[1,3,2] = 8  # ME3/2
emtf_lut[1,4,1] = 10 # ME4/1
emtf_lut[1,4,2] = 10 # ME4/2

emtf_lut[2,1,2] = 4  # RE1/2
emtf_lut[2,2,2] = 5  # RE2/2
emtf_lut[2,3,1] = 9  # RE3/1
emtf_lut[2,3,2] = 9  # RE3/2
emtf_lut[2,3,3] = 9  # RE3/3
emtf_lut[2,4,1] = 11 # RE4/1
emtf_lut[2,4,2] = 11 # RE4/2
emtf_lut[2,4,3] = 11 # RE4/3

emtf_lut[3,1,1] = 1  # GE1/1
emtf_lut[3,2,1] = 6  # GE2/1

emtf_lut[4,1,1] = 0  # ME0

# Get layer function
def get_layer(dtype, station, ring):
  layer = emtf_lut[dtype.astype(int),station.astype(int),ring.astype(int)] 
  return layer

# Delta phi function
def calc_dphi(phi1, phi2):
    """Computes phi2-phi1 given in range [-pi,pi]"""
    dphi = phi2 - phi1
    dphi[dphi > np.pi] -= 2*np.pi
    dphi[dphi < -np.pi] += 2*np.pi
    return dphi


## Split HITS by SECTOR Function

In [0]:
 # Split hits by detector ETA and PHI sector  
 def splitHitsBySector(df_hits):
  
   # Define detector PHI and ETA sections , where phi_range = (-np.pi, np.pi)
   phi_edges = np.linspace(*phi_range, num=n_phi_sections+1)
   eta_edges = np.linspace(*eta_range, num=n_eta_sections+1)
   
   # Split hits according to provided phi and eta boundaries."""
   hits_sectors = []

   # Loop over PHI sections
   for i in range(len(phi_edges) - 1):

     # Select hits in this phi sector
     phi_min, phi_max = phi_edges[i], phi_edges[i+1]
     phi_hits = hits[(hits.phi > phi_min) & (hits.phi < phi_max)]

     # Center these hits on phi=0
     centered_phi = phi_hits.phi - (phi_min + phi_max) / 2
     phi_hits = phi_hits.assign(phi=centered_phi, phi_section=i)

     # Loop over ETA sections
     for j in range(len(eta_edges) - 1):

       # Select hits in this eta sector
       eta_min, eta_max = eta_edges[j], eta_edges[j+1]
       
       theta = np.arctan2(phi_hits.r, phi_hits.z)
       eta= -1. * np.log(np.tan(theta / 2.))

       sec_hits = phi_hits[(eta > eta_min) & (eta < eta_max)]
       hits_sectors.append(sec_hits.assign(eta_section=j))

  # Return list of dataframes hits in each detector section 
  return hits_sectors


## Build HITS Function

In [0]:
 # Function that builds a HITS dataframe per event containing all real muon hits and merge it with pileup hits
 def buildHits(event_mu,event_pu):
  
  # Create a HITS dataframe for a given muon event 
  hits_mu = event_mu[HITVARS]        # create a DF containing only muon hits variables
  hits_mu = hits_mu.values.tolist()  # evaluate jagged arrays and transform to list or arrays DF ( trick for DF of jagged arrays )
  df_hits_mu = pd.DataFrame(hits_mu, index=hits_mu.index) # create a dataframe from a list of arrays
  df_hits_mu = df_hits_mu.transpose() # transpose dataframe to have hit variables as columns 

  # Get only true muon hits (use generator-level matching condition)!
  df_hits_mu = df_hits_mu[(df_hits_mu['vh_sim_tp1']==0) & (df_hits_mu['vh_sim_tp2']==0)]   

  # Create a HITS only dataframe for a given pileup event 
  hits_pu = event_pu[HITVARS]         # create a DF containing only pileup hits variables
  hits_pu = hits_pu.values.tolist()   # evaluate jagged arrays and transform to list or arrays DF ( trick for DF of jagged arrays )
  df_hits_pu = pd.DataFrame(hits_pu, index=hits_pu.index)   # create a dataframe from a list of arrays
  df_hits_pu = df_hits_pu.transpose() # transpose dataframe to have hit variables as columns

  # Add "isMuon" variable to dataframes
  df_hits_mu['isMuon'] = np.ones(len(df_hits_mu))
  df_hits_pu['isMuon'] = np.zeros(len(df_hits_pu))
  
 # print("len(df_hits_mu)=",len(df_hits_mu))
 # print("df_hits_mu=",df_hits_mu.head(3))
  
 # print("len(df_hits_pu)=",len(df_hits_pu))
 # print("df_hits_pu=",df_hits_pu.head(3))

  # Concatenate MUON and PU hits dataframes into a single hits dataframe
  df_hits = pd.concat([df_hits_mu, df_hits_pu])
 
  # Add hit layer info to dataframe
  df_hits['vh_layer'] = df_hits.apply(lambda row: get_layer(row['vh_type'], row['vh_station'], row['vh_ring']), axis=1)
 
  # Filter out hits without layer information
  #df_events_mu = df_events_mu[(df_events_mu["vh_layer"]>=0)]
  #df_events_pu = df_events_pu[(df_events_pu["vh_layer"]>=0)]

 # print("len(df_hits)=",len(df_hits))
 # print("df_hits head = ", df_hits.head(3) ) 

  # Add the muon event number to the hits dataframe ( hits history )
  evtid = event_mu['ve_event'][0]        # muon event number
  df_hits["evtid"]=evtid
  # print("df_hits head=",df_hits.tail(3))

  # Return the HITS dataframe 
  return df_hits
 

# Build SEGMENTS Function

In [7]:
# Function that builds SEGMENTS ( hit pairs ) using Hits dataframes. SEGMENTS are defined using hit pairs in consecutive layers.
def buildSegments(df_hits):

  segments= [] # list of segments

  # Group hits by layer number
  hits_groups = df_hits.groupby("vh_layer")

  # Create a list of CSC layer ID pairs for consecutive(adjacent) layers
  csc_layers = [ 0 , 2 , 3 , 7 , 8 , 10 ]
  layer_pairs = [ [i,j] for i in csc_layers for j in csc_layers]
  adj_layer_pairs = [ x for x in layer_pairs if ( csc_layers.index(x[0])+1 == csc_layers.index(x[1]) ) ]

  # Loop over adjacent CSC layers ID pairs  
  for l1,l2 in adj_layer_pairs:

    # Join all hit pairs together in a dataframe
    df_hits1 = hits_groups.get_group(l1)
    df_hits2 = hits_groups.get_group(l2)
#    print(df_hits1.keys())
#    df_hit_pairs = pd.merge( df_hits1.reset_index(), df_hits2.reset_index(), how='inner', on='evtid', suffixes=('_1', '_2'))
    df_hit_pairs = pd.merge( df_hits1.reset_index(), df_hits2.reset_index(), on='evtid', suffixes=('_1', '_2'))

    print(df_hit_pairs.keys())

   # Compute line segment through the hits
    dphi = calc_dphi(df_hit_pairs.vh_sim_phi_1, df_hit_pairs.vh_sim_phi_2)
    dz = df_hit_pairs.vh_sim_z_2 - df_hit_pairs.vh_sim_z_1
    dr = df_hit_pairs.vh_sim_r_2 - df_hit_pairs.vh_sim_r_1
    phi_slope = dphi / dr
    z0 = df_hit_pairs.vh_sim_z_1 - df_hit_pairs.vh_sim_r_1 * dz / dr
  
    # Filter segments according to criteria
    #good_seg_mask = (phi_slope.abs() < phi_slope_max) & (z0.abs() < z0_max)
  #   good_seg_mask = (((phi_slope.abs() < phi_slope_max) & (df_hit_pairs.layer_1[0] < 5)) | ((phi_slope.abs() < phi_slope_outer_max) & (df_hit_pairs.layer_1[0] >= 5))) & (z0.abs() < z0_max)  
   #  df_hit_pairs = df_hit_pairs[good_seg_mask]
 
    # Create a hit pairs dataframe , add segments variables and store in segments list
    segments.append(df_hit_pairs[['evtid', 'index_1', 'index_2', 'vh_layer_1', 'vh_layer_2']].assign(dphi=dphi, dz=dz, dr=dr, phi_slope=phi_slope, z0=z0))
 
   # End loop over segment pairs 

  # Create an all events segments dataframe from list of dataframes of event segments
  df_segments = pd.concat(segments, ignore_index=True)
  print(df_segments.keys())
  print(df_segments.head)

  # Return segments dataframe
  return df_segments

#def select_segments(df_segments, phi_slope_min, phi_slope_max, z0_max):
#    sel_mask = ((df_segments.phi_slope.abs() > phi_slope_min) &
#                (df_segments.phi_slope.abs() < phi_slope_max) &
#                (df_segments.z0.abs() < z0_max))
#    return df_segments.assign(selected=sel_mask)
#
#def segment_efficiency(df_segments):
#    return (df_segments.y & df_segments.selected).sum() / df_segments.y.sum()
#
#def segment_purity(df_segment):
#    return (df_segments.y & df_segments.selected).sum() / df_segments.selected.sum()

Index(['index_1', 'vh_layer_1', 'vh_sector_1', 'vh_station_1', 'vh_ring_1',
       'vh_sim_z_1', 'vh_sim_r_1', 'vh_sim_phi_1', 'vh_sim_eta_1',
       'vh_sim_theta_1', 'vh_bend_1', 'vh_sim_tp1_1', 'vh_sim_tp2_1',
       'isMuon_1', 'evtid', 'index_2', 'vh_layer_2', 'vh_sector_2',
       'vh_station_2', 'vh_ring_2', 'vh_sim_z_2', 'vh_sim_r_2', 'vh_sim_phi_2',
       'vh_sim_eta_2', 'vh_sim_theta_2', 'vh_bend_2', 'vh_sim_tp1_2',
       'vh_sim_tp2_2', 'isMuon_2'],
      dtype='object')
Index(['index_1', 'vh_layer_1', 'vh_sector_1', 'vh_station_1', 'vh_ring_1',
       'vh_sim_z_1', 'vh_sim_r_1', 'vh_sim_phi_1', 'vh_sim_eta_1',
       'vh_sim_theta_1', 'vh_bend_1', 'vh_sim_tp1_1', 'vh_sim_tp2_1',
       'isMuon_1', 'evtid', 'index_2', 'vh_layer_2', 'vh_sector_2',
       'vh_station_2', 'vh_ring_2', 'vh_sim_z_2', 'vh_sim_r_2', 'vh_sim_phi_2',
       'vh_sim_eta_2', 'vh_sim_theta_2', 'vh_bend_2', 'vh_sim_tp1_2',
       'vh_sim_tp2_2', 'isMuon_2'],
      dtype='object')
Index(['index_1', 'vh_

## Build GRAPHS function

In [0]:
def buildGraphs(hits,segments):

  # Get number of hits and segments in event
  n_hits = hits.shape[0]
  n_edges = segments.shape[0]

  # Prepare the GRAPH tensors( matrices )
  Ri = np.zeros((n_hits, n_edges), dtype=np.uint8)
  Ro = np.zeros((n_hits, n_edges), dtype=np.uint8)
  X = (hits[feature_names].values / feature_scale).astype(np.float32)
  y = np.zeros(n_edges, dtype=np.float32)

  # We have the segments' hits given by dataframe label,
  # so we need to translate into positional indices.
  # Use a series to map hit label-index onto positional-index.
  hit_idx = pd.Series(np.arange(n_hits), index=hits.index)
  seg_start = hit_idx.loc[segments.index_1].values
  seg_end = hit_idx.loc[segments.index_2].values

  # Now we can fill the association matrices.
  # Note that Ri maps hits onto their incoming edges,
  # which are actually segment endings.
  Ri[seg_end, np.arange(n_edges)] = 1
  Ro[seg_start, np.arange(n_edges)] = 1
  
  # Fill the segment labels
  pid1 = hits.particle_id.loc[segments.index_1].values
  pid2 = hits.particle_id.loc[segments.index_2].values
  y[:] = (pid1 == pid2)
  
  # Return a GRAPH (named tuple)
  return Graph(X, Ri, Ro, y)


# Read EVENTS function

In [3]:
# Function that reads all EVENTS from input file sand sture it in dataframes 
def readEvents():

  # Load the DATA and store selected the Root tree variables into Pandas dataframe
  !ls 'gdrive/My Drive/Colab Notebooks/Data'
  data_dir = 'gdrive/My Drive/Colab Notebooks/Data'

  file_mu   = data_dir+'/ntuple_SingleMuon_Endcap_9.root'
  file_pu = data_dir+'/ntuple_SingleNeutrino_PU200_63.root'
  upfile_mu = uproot.open(file_mu)
  upfile_pu = uproot.open(file_pu)
  tree_mu = upfile_mu["ntupler"]["tree"] # dictionary of NumPy arrays
  tree_pu = upfile_pu["ntupler"]["tree"] # dictionary of NumPy arrays

  #upfile_mu.keys()
  #tree_mu.show()


  # Read ROOT trees into dataframes
  events_start=0
  events_end=10
  df_events_mu = tree_mu.pandas.df(VARS,flatten=False, entrystart=int(events_start), entrystop=int(events_end))
  df_events_pu = tree_pu.pandas.df(VARS,flatten=False, entrystart=int(events_start), entrystop=int(events_end))
  df_muon_vp   = tree_mu.pandas.df(['vp_pt','vp_eta'], entrystart=int(events_start), entrystop=int(events_end))

  # Get number of events in dataframes
  nmu = len(df_events_mu)
  npu = len(df_events_pu)
  print('nmu=',nmu)
  print('npu=',npu)

  # Return events dataframes
  return df_events_mu , df_events_pu , df_muon_vp

_about.txt	       ntuple_ggH_ZZ4mu.root
bolsas_astro.txt       ntuple_qqH_ZZ4mu.root
bolsas_fisica.txt      ntuple_SingleMuon_Endcap_9.root
events.root	       ntuple_SingleNeutrino_PU200_63.root
GOOG.csv	       VBFHZZ_background.csv
jet_images.h5	       VBFHZZ_signal.csv
ntuple_bkg_ZZ4mu.root
nmu= 10
npu= 10


## MAIN program


In [4]:
# Initialize the random number generator
print('Initializing random seed=1')
random.seed(1)

# Read all EVENTS into dataframes
df_events_mu , df_events_pu , df_muons_vp = readEvents()

# Loop over muon events mu 
for ievt, event_mu in df_events_mu.iterrows():
 
  # Pick a random pu event 
  j = random.randint(1,npu-1)
  #print ('Ievt=',ievt,' Ipu=',j)
  event_pu = df_events_pu.loc[j] 
  
  # Build HITS dataframe merging pileup and muon HITS of the given event
  df_hits = buildHits(event_mu,event_pu)

  # Create a list of HITS dataframe by detector Eta X Phi sector
  hits_sectors = splitHitsBySector(df_hits)

  # Loop over list of HITS dataframes per sector
  for df_hits in hits_sectors: 

  # Build SEGMENTS ( hits pairs in adjacent layers )
  df_segments = buildSegments(df_hits)

  # Build GRAPHS
  df_graphs = buildGraphs(df_hits,df_segments)

  # End loop over events

# 


  print(" ")
  print("----------------------------------------------------------------------- ")
  print(" ")

Initializing random seed=1
len(df_hits_mu)= 5
df_hits_mu=    vh_layer  vh_sector  vh_station  ...  vh_sim_tp1  vh_sim_tp2  isMuon
0       6.0        6.0         2.0  ...         0.0         0.0     1.0
1       0.0        6.0         1.0  ...         0.0         0.0     1.0
2       7.0        6.0         2.0  ...         0.0         0.0     1.0

[3 rows x 13 columns]
len(df_hits_pu)= 51
df_hits_pu=    vh_layer  vh_sector  vh_station  ...  vh_sim_tp1  vh_sim_tp2  isMuon
0       2.0        1.0         1.0  ...        -1.0        -1.0     0.0
1       0.0        1.0         1.0  ...        -1.0        -1.0     0.0
2       2.0        1.0         1.0  ...        -1.0        -1.0     0.0

[3 rows x 13 columns]
len(df_hits)= 56
df_hits head =     vh_layer  vh_sector  vh_station  ...  vh_sim_tp1  vh_sim_tp2  isMuon
0       6.0        6.0         2.0  ...         0.0         0.0     1.0
1       0.0        6.0         1.0  ...         0.0         0.0     1.0
2       7.0        6.0         2.0  ...