## Doublet Maker

### 0 Environment Set Up

In [1]:
! pip install --user git+https://github.com/LAL/trackml-library

Collecting git+https://github.com/LAL/trackml-library
  Cloning https://github.com/LAL/trackml-library to /tmp/pip-req-build-ifdyebij
  Running command git clone -q https://github.com/LAL/trackml-library /tmp/pip-req-build-ifdyebij
Building wheels for collected packages: trackml
  Building wheel for trackml (setup.py) ... [?25ldone
[?25h  Created wheel for trackml: filename=trackml-3-py2.py3-none-any.whl size=13512 sha256=4ee76ca25239e269a4c309045f3af7cec987cf94af17b06d64a5b8363eceff55
  Stored in directory: /tmp/pip-ephem-wheel-cache-y8xvm7sb/wheels/62/a8/3a/330c0e606bd185f850e7aec01df4607aa3df395945cf74905c
Successfully built trackml


### 1 Imports

In [1]:
import numpy as np
import pandas as pd
import trackml.dataset
from numba import jit, guvectorize, prange
from numba import int64, float32, boolean

from doublet_making_helper import *

### 2 Constants

In [2]:
pt_min = 0
path= "../exatrkx-work/volpredictor/train_100_events/"
nPhiSlices = 53
nLayers = 10
maxDoubletLength = 300.0
minDoubletLength = 10.0
zPlus = 150.0
zMinus = -150.0
maxEta = 2.7
maxTheta = 2 * np.arctan(np.exp(-maxEta))    
maxCtg = np.cos(maxTheta) / np.sin(maxTheta) 
modelLayers = np.array([
                [0, 32,   -455,  455],   # 8-2
                [0, 72,   -455,  455],   # 8-4
                [0, 116,  -455,  455],   # 8-6
                [0, 172,  -455,  455],   # 8-8
                [0, 260,  -1030, 1030],  # 13-2
                [0, 360,  -1030, 1030],  # 13-4
                [0, 500,  -1030, 1030],  # 13-6
                [0, 660,  -1030, 1030],  # 13-8
                [0, 820,  -1030, 1030],  # 17-2
                [0, 1020, -1030, 1030]   # 17-4
], dtype='int32')
FALSE_INT = 99999   #Integer that represents a false value

### 3 Load Data

In [3]:
np.random.seed(30) # Chef Curry
prefix= "event00000" + str(np.random.choice(100) + 1000)
hits, particles, truth = trackml.dataset.load_event(
        path + prefix, parts=['hits', 'particles', 'truth'])

### 4 Prepare Data

##### Make cuts

In [4]:
%%time
# Barrel volume and layer ids
vlids = [(8,2), (8,4), (8,6), (8,8),
         (13,2), (13,4), (13,6), (13,8),
         (17,2), (17,4)]
n_det_layers = len(vlids)

# Select barrel layers and assign convenient layer number [0-9]
vlid_groups = hits.groupby(['volume_id', 'layer_id'])
hits = pd.concat([vlid_groups.get_group(vlids[i]).assign(layer=i)
                  for i in range(n_det_layers)])

# Calculate particle transverse momentum
pt = np.sqrt(particles.px**2 + particles.py**2)

# True particle selection.
# Applies pt cut, removes all noise hits.
particles = particles[pt > pt_min]
truth = (truth[['hit_id', 'particle_id']]
         .merge(particles[['particle_id']], on='particle_id'))

# Calculate derived hits variables
r = np.sqrt(hits.x**2 + hits.y**2)
phi = np.arctan2(hits.y, hits.x)

# Select the data columns we need
hits = (hits
        .assign(r=r)
        .merge(truth[['hit_id', 'particle_id']], on='hit_id'))

# Remove duplicate hits
hits = hits.loc[
    hits.groupby(['particle_id', 'layer'], as_index=False).r.idxmin()
]

hits

CPU times: user 6.89 s, sys: 24.6 ms, total: 6.92 s
Wall time: 6.93 s


Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id,layer,r,particle_id
5041,20683,31.391100,-1.742210,66.873100,8,2,120,0,31.439409,4503874505277440
13737,29745,71.156197,-1.098510,150.440002,8,4,304,1,71.164673,4503874505277440
20044,36655,116.768997,3.419910,246.522003,8,6,547,2,116.819069,4503874505277440
25466,43100,170.757996,14.356700,362.315002,8,8,976,3,171.360458,4503874505277440
30823,73099,252.804993,43.484100,545.200012,13,2,622,4,256.517487,4503874505277440
...,...,...,...,...,...,...,...,...,...,...
30344,72575,-247.811005,69.083298,259.200012,13,2,559,4,257.260162,801642176780959744
40856,85479,-415.795990,277.283997,524.000000,13,6,1241,6,499.772675,801642176780959744
44753,91383,-440.278015,488.877014,734.799988,13,8,1823,7,657.909912,801642176780959744
47731,107646,-270.306000,775.133972,1067.800049,17,2,2497,8,820.912903,801642176780959744


##### Reformat hit table

In [5]:
%%time
hits['phi_bin'] = bin_phi(hits['x'].values, hits['y'].values, nPhiSlices)
hits['r'] = np.hypot(hits['x'].values, hits['y'].values)
hits.drop(columns=['x', 'y', 'volume_id', 'module_id', 'layer_id'], inplace=True)
cols = hits.columns.tolist() # Rearranging column order
cols = [cols[0],   # hit_id
        cols[2],   # layer
        cols[5],   # phi_bin
        cols[3],   # r
        cols[1],   # z
        cols[4]]  # particle_id

hits = hits[cols]
hit_table = hits.values.astype(np.int64)
nHits = hit_table.shape[0]
print('Number of hits: ', nHits)

hits

Number of hits:  40095
CPU times: user 132 ms, sys: 0 ns, total: 132 ms
Wall time: 130 ms


Unnamed: 0,hit_id,layer,phi_bin,r,z,particle_id
5041,20683,0,51,31.439409,66.873100,4503874505277440
13737,29745,1,51,71.164673,150.440002,4503874505277440
20044,36655,2,0,116.819069,246.522003,4503874505277440
25466,43100,3,0,171.360458,362.315002,4503874505277440
30823,73099,4,1,256.517517,545.200012,4503874505277440
...,...,...,...,...,...,...
30344,72575,4,23,257.260162,259.200012,801642176780959744
40856,85479,6,21,499.772675,524.000000,801642176780959744
44753,91383,7,19,657.909912,734.799988,801642176780959744
47731,107646,8,15,820.912903,1067.800049,801642176780959744


### 5 Helper Functions

In [6]:
@jit(nopython=True)
def filter(inner_hit, layer_range, z_ranges):
    '''
    This function combines the helper filters into one filter
    '''
    keep = np.array([True] * hit_table.shape[0])
    for row_idx in range(hit_table.shape[0]):
        keep[row_idx] = (filter_layers(hit_table[row_idx][1],
                                       layer_range) and
                         filter_phi(inner_hit[2],
                                    hit_table[row_idx][2],
                                    nPhiSlices) and
                         filter_doublet_length(inner_hit[3],
                                               hit_table[row_idx][3],
                                               minDoubletLength,
                                               maxDoubletLength) and
                         filter_horizontal_doublets(inner_hit[3],
                                                    inner_hit[4],
                                                    hit_table[row_idx][3],
                                                    hit_table[row_idx][4],
                                                    maxCtg) and
                         filter_z(hit_table[row_idx][1],
                                  hit_table[row_idx][4],
                                  layer_range,
                                  z_ranges))
    return keep

In [7]:
@jit(nopython=True)
def get_valid_ranges(inner_hit):
    '''
    This function returns the list of layers that contain interesting hits,
    given our chosen inner hit. It also returns the min/max bound in the
    z-direction for interesting hits for each outer layer.
    '''
    #Get the radius of each layer
    refCoords = np.array([modelLayers[layer_idx][1] for layer_idx in range(nLayers)], dtype=int64)

    #Get the list of all valid layers
    layer_range = get_layer_range(inner_hit, refCoords, nLayers, maxDoubletLength, FALSE_INT)

    #Find the z bounds for each valid layer
    z_ranges = get_z_ranges(inner_hit, refCoords, layer_range, zMinus, zPlus, FALSE_INT)

    #Filter layers whose bounds of interest fall outside their geometric bounds
    z_mask(layer_range, z_ranges, modelLayers, FALSE_INT)

    return layer_range, z_ranges

### 6 Make Doublets

In [8]:
@jit(nopython=True, parallel=True)
def make():
    '''
    This function makes all possible doublets that fit the criteria of the
    filter. It first choses an inner hit and then iterates through the hit
    table looking for possible outer hit candidates. It then returns a list
    of hit ids cooresponding to the inner and outer hit pairs of the created
    doublets.
    '''
    ncolumns = int(nHits * 0.01)
    outer_2D = np.zeros((nHits, ncolumns), dtype=int64)

    for row_idx in prange(nHits):
        inner_hit = hit_table[row_idx]
        layer_range, z_ranges = get_valid_ranges(inner_hit)
        outer_hit_set = hit_table[filter(inner_hit, layer_range, z_ranges)].T[0]
        for column_idx in prange(len(outer_hit_set)):
            outer_2D[row_idx][column_idx] = outer_hit_set[column_idx]

    outer = np.reshape(outer_2D, (1, nHits * ncolumns))[0]
    inner = np.zeros(len(outer), dtype=int64)
    for row_count in prange(outer_2D.shape[0]):
        for col_count in prange(ncolumns):
             inner[(row_count * ncolumns + col_count)] = hit_table[row_count][0]

    return inner, outer

In [9]:
%%time
make()

CPU times: user 1min 45s, sys: 912 ms, total: 1min 46s
Wall time: 8.21 s


(array([20683, 20683, 20683, ..., 91858, 91858, 91858]),
 array([29745, 36655, 28135, ...,     0,     0,     0]))