## Doublet Maker

### 1 Imports

In [1]:
import numpy as np
import pandas as pd
import trackml.dataset
import cupy as cp

#CPU Imports
from numba import jit, guvectorize, prange
from numba import int64, float32, boolean

import doublet_making_helper as cpu

### 2 Constants

In [2]:
pt_min = 0
path= "../exatrkx-work/volpredictor/train_100_events/"
nPhiSlices = 53
nLayers = 10
maxDoubletLength = 300.0
minDoubletLength = 10.0
zPlus = 150.0
zMinus = -150.0
maxEta = 2.7
maxTheta = 2 * np.arctan(np.exp(-maxEta))    
maxCtg = np.cos(maxTheta) / np.sin(maxTheta) 
modelLayers = np.array([
                [0, 32,   -455,  455],   # 8-2
                [0, 72,   -455,  455],   # 8-4
                [0, 116,  -455,  455],   # 8-6
                [0, 172,  -455,  455],   # 8-8
                [0, 260,  -1030, 1030],  # 13-2
                [0, 360,  -1030, 1030],  # 13-4
                [0, 500,  -1030, 1030],  # 13-6
                [0, 660,  -1030, 1030],  # 13-8
                [0, 820,  -1030, 1030],  # 17-2
                [0, 1020, -1030, 1030]   # 17-4
], dtype='int32')

#Get the radius of each layer
refCoords = np.array([modelLayers[layer_idx][1] for layer_idx in range(nLayers)], dtype=np.int64)

### 3 Load Data

In [3]:
np.random.seed(30) # Chef Curry
prefix= "event00000" + str(np.random.choice(100) + 1000)
hits, particles, truth = trackml.dataset.load_event(
        path + prefix, parts=['hits', 'particles', 'truth'])

### 4 Prepare Data

##### Make cuts

In [4]:
%%time
# Barrel volume and layer ids
vlids = [(8,2), (8,4), (8,6), (8,8),
         (13,2), (13,4), (13,6), (13,8),
         (17,2), (17,4)]
n_det_layers = len(vlids)

# Select barrel layers and assign convenient layer number [0-9]
vlid_groups = hits.groupby(['volume_id', 'layer_id'])
hits = pd.concat([vlid_groups.get_group(vlids[i]).assign(layer=i)
                  for i in range(n_det_layers)])

# Calculate particle transverse momentum
pt = np.sqrt(particles.px**2 + particles.py**2)

# True particle selection.
# Applies pt cut, removes all noise hits.
particles = particles[pt > pt_min]
truth = (truth[['hit_id', 'particle_id']]
         .merge(particles[['particle_id']], on='particle_id'))

# Calculate derived hits variables
r = np.sqrt(hits.x**2 + hits.y**2)
phi = np.arctan2(hits.y, hits.x)

# Select the data columns we need
hits = (hits
        .assign(r=r)
        .merge(truth[['hit_id', 'particle_id']], on='hit_id'))

# Remove duplicate hits
hits = hits.loc[
    hits.groupby(['particle_id', 'layer'], as_index=False).r.idxmin()
]

hits

CPU times: user 4.31 s, sys: 27 ms, total: 4.34 s
Wall time: 4.34 s


Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id,layer,r,particle_id
5041,20683,31.391100,-1.742210,66.873100,8,2,120,0,31.439409,4503874505277440
13737,29745,71.156197,-1.098510,150.440002,8,4,304,1,71.164673,4503874505277440
20044,36655,116.768997,3.419910,246.522003,8,6,547,2,116.819069,4503874505277440
25466,43100,170.757996,14.356700,362.315002,8,8,976,3,171.360458,4503874505277440
30823,73099,252.804993,43.484100,545.200012,13,2,622,4,256.517487,4503874505277440
...,...,...,...,...,...,...,...,...,...,...
30344,72575,-247.811005,69.083298,259.200012,13,2,559,4,257.260162,801642176780959744
40856,85479,-415.795990,277.283997,524.000000,13,6,1241,6,499.772675,801642176780959744
44753,91383,-440.278015,488.877014,734.799988,13,8,1823,7,657.909912,801642176780959744
47731,107646,-270.306000,775.133972,1067.800049,17,2,2497,8,820.912903,801642176780959744


##### Reformat hit table

In [5]:
%%time
hits['phi_bin'] = cpu.bin_phi(hits['x'].values, hits['y'].values, nPhiSlices)
hits['r'] = np.hypot(hits['x'].values, hits['y'].values)
particle_ids = hits['particle_id'].values # save particle ids for error checking
hits.drop(columns=['x', 'y', 'volume_id', 'module_id', 'layer_id', 'particle_id'], inplace=True)
cols = hits.columns.tolist() # Rearranging column order
cols = [cols[0],   # hit_id
        cols[2],   # layer
        cols[4],   # phi_bin
        cols[3],   # r
        cols[1]]   # z

hits = hits[cols]
hit_table = hits.values.astype(np.int64)
nHits = hit_table.shape[0]
print('Number of hits: ', nHits)

hits

Number of hits:  40095
CPU times: user 118 ms, sys: 0 ns, total: 118 ms
Wall time: 117 ms


Unnamed: 0,hit_id,layer,phi_bin,r,z
5041,20683,0,51,31.439409,66.873100
13737,29745,1,51,71.164673,150.440002
20044,36655,2,0,116.819069,246.522003
25466,43100,3,0,171.360458,362.315002
30823,73099,4,1,256.517517,545.200012
...,...,...,...,...,...
30344,72575,4,23,257.260162,259.200012
40856,85479,6,21,499.772675,524.000000
44753,91383,7,19,657.909912,734.799988
47731,107646,8,15,820.912903,1067.800049


### 5. GPU Pre-Proccessing

##### Calculate Number of Threads

In [6]:
def get_N_threads(nhits):
    '''
    Input - number of hits in event
    
    Output - tuple (number of blocks, blocks per thread)
    
    Returns the number of blocks and threads per block
    for a given number of hits.
    '''
    ncomparisons = nhits**2
    if ncomparisons < 1024:
        return 1, ncomparisions
    else:
        return int(np.ceil(ncomparisons/1024)), 1024

##### Define Kernel Function

In [7]:
build_doublets_kernel = cp.RawKernel(r'''
     extern "C" __global__
     void my_build_doublets(const int* hits, const int* params, const int* refCoords, const int N, int* out1, int* out2) {
         
         // Map Thread to hits
         const int tid = blockIdx.x * blockDim.x + threadIdx.x;
         const unsigned int in_idx = tid / N;
         const unsigned int out_idx = tid % N;
         
         if(tid < N*N){
             
             // Define constants
             const int nPhiSlices = params[0];
             const int maxDoubletLength = params[1];
             const int minDoubletLength = params[2];
             const int maxCtg = params[3];
             const int zMinus = params[4];
             const int zPlus = params[5];

             const int in_id  = hits[in_idx * 5];
             const int in_lyr = hits[in_idx * 5 + 1];
             const int in_phi = hits[in_idx * 5 + 2];
             const int in_r   = hits[in_idx * 5 + 3];
             const int in_z   = hits[in_idx * 5 + 4];

             const int ot_id  = hits[out_idx * 5];
             const int ot_lyr = hits[out_idx * 5 + 1];
             const int ot_phi = hits[out_idx * 5 + 2];
             const int ot_r   = hits[out_idx * 5 + 3];
             const int ot_z   = hits[out_idx * 5 + 4];

             bool isvalid;

             // Layer filter
             isvalid = (ot_lyr == in_lyr+1 || ot_lyr == in_lyr+2 || ot_lyr == in_lyr-1 || ot_lyr == in_lyr-2);

             // Phi filter
             isvalid = (((in_phi - 1) == ot_phi) || 
                        ((in_phi + 1) == ot_phi) || (in_phi == ot_phi) ||
                        ((in_phi == 0) & ot_phi == nPhiSlices - 2) ||
                        ((in_phi == nPhiSlices - 2) & ot_phi == 0)) && isvalid;
             
             // Doublet length filter
             isvalid = (((ot_r - in_r) < maxDoubletLength) & ((ot_r - in_r) > minDoubletLength)) && isvalid;

             // Horizontal doublet filter
             //isvalid = (abs((ot_z - in_z)/(ot_r - in_r)) < maxCtg) && isvalid;

             // Z filter
             float zmin = zMinus + refCoords[ot_lyr] * (in_z - zMinus) / in_r;
             float zmax = zPlus + refCoords[ot_lyr] * (in_z - zPlus) / in_r;
             if(zmin > zmax){float temp=zmin; zmin=zmax; zmax=temp;}
             isvalid = (ot_z > zmin and ot_z < zmax) && isvalid;

             // Store result in output arrays
             if(isvalid){
                 out1[tid] = in_id;
                 out2[tid] = ot_id;
             }
        }
     }
     ''', 'my_build_doublets')

##### Allocate memory on GPU

In [8]:
%%time
gpu_hit_table = cp.array(hit_table, dtype=cp.int32)
nhits = cp.array(nHits, dtype=cp.int32)
params = cp.array([nPhiSlices, 
                   maxDoubletLength, 
                   minDoubletLength,
                   maxCtg,
                   zMinus,
                   zPlus], dtype=cp.int32)
gpu_refCoords = cp.array(refCoords, dtype=cp.int32)
inner_dblts = cp.zeros(nHits**2, dtype=cp.int32)
outer_dblts = cp.zeros(nHits**2, dtype=cp.int32)

CPU times: user 209 ms, sys: 201 ms, total: 410 ms
Wall time: 433 ms


### 6. Build Doublets

In [9]:
%%time
nBlocks, thrdsPerBlk = get_N_threads(nHits)
build_doublets_kernel((nBlocks,), (thrdsPerBlk,), (gpu_hit_table.reshape((1, nHits*5)),
                                                  params,
                                                  gpu_refCoords,
                                                  nHits,
                                                  inner_dblts,
                                                  outer_dblts))

CPU times: user 155 ms, sys: 401 µs, total: 156 ms
Wall time: 159 ms


### 7. GPU Post-Processing

##### Free Anscillary Data from GPU

In [14]:
mempool = cp.get_default_memory_pool()
pinned_mempool = cp.get_default_pinned_memory_pool()
init_bytes = mempool.used_bytes()
del gpu_hit_table
del nhits
del params
del gpu_refCoords
mempool.free_all_blocks()
pinned_mempool.free_all_blocks()
print("Freed", init_bytes - mempool.used_bytes(), "Bytes")

Freed 803840 Bytes


array(0.00218474)

1607609025

In [24]:
inner_dblts[3*nHits+3]

array(17359, dtype=int32)

In [15]:
outer_dblts[2*nHits+2]

array(73663, dtype=int32)

In [14]:
nHits

40095

In [13]:
for i in range(nHits):
    try:
        assert(inner_dblts[i*nHits + i] == gpu_hit_table[i, 0])
    except:
        raise Exception('{} does not equal {} in inner matrix at {}th diagonal entry'.format(inner_dblts[i*nHits+i], gpu_hit_table[i, 0], i))
    try:
        assert(outer_dblts[i*nHits + i] == gpu_hit_table[i, 0])
    except:
        raise Exception('{} does not equal {} in inner matrix at {}th diagonal entry'.format(outer_dblts[i*nHits+i], gpu_hit_table[i, 0], i))

In [23]:
hits.loc[0, 'hit_id']

15529

In [24]:
hits

Unnamed: 0,hit_id,layer,phi_bin,r,z,particle_id
5041,20683,0,51,31.439409,66.873100,4503874505277440
13737,29745,1,51,71.164673,150.440002,4503874505277440
20044,36655,2,0,116.819069,246.522003,4503874505277440
25466,43100,3,0,171.360458,362.315002,4503874505277440
30823,73099,4,1,256.517517,545.200012,4503874505277440
...,...,...,...,...,...,...
30344,72575,4,23,257.260162,259.200012,801642176780959744
40856,85479,6,21,499.772675,524.000000,801642176780959744
44753,91383,7,19,657.909912,734.799988,801642176780959744
47731,107646,8,15,820.912903,1067.800049,801642176780959744


In [10]:
gpu_hit_table

array([[ 20683,      0,     51,     31,     66],
       [ 29745,      1,     51,     71,    150],
       [ 36655,      2,      0,    116,    246],
       ...,
       [ 91383,      7,     19,    657,    734],
       [107646,      8,     15,    820,   1067],
       [ 91858,      7,     16,    658,    915]], dtype=int32)

In [9]:
hit_table

array([[ 20683,      0,     51,     31,     66],
       [ 29745,      1,     51,     71,    150],
       [ 36655,      2,      0,    116,    246],
       ...,
       [ 91383,      7,     19,    657,    734],
       [107646,      8,     15,    820,   1067],
       [ 91858,      7,     16,    658,    915]])

In [21]:
np.where(gpu_hit_table[:, 0] == 73663)

(array([12]),)

In [25]:
gpu_hit_table.reshape((1, nHits*5))

array([[20683,     0,    51, ...,    16,   658,   915]], dtype=int32)