In [1]:
import numpy as np 
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
import pandas as pd
import os
import trackml
import time
import math
from numba import jit
from trackml.dataset import load_event , load_dataset
from trackml.score import score_event
from scipy.spatial import distance
import decimal as d


### Importing data

In [2]:
hits_train_100, cells_train_100, particles_train_100, truth_train_100 = load_event('train_100_events/event000001000')
data_detectors = pd.read_csv(r"detectors.csv")

#print(data_detectors.iloc[:,:1])
#print(truth_train_100[0:4])

In [3]:
def func_import_100_sample():
    event_id_10 = np.linspace(0,9,10)
    event_id_100 = np.linspace(10,99,90)

    cells_all = []
    hits_all = []
    particles_all = []
    truth_all = []

    for i in range(len(event_id_10)):
        cells_all.append(pd.read_csv('train_100_events/event00000100%d-cells.csv' % event_id_10[i]))
    for i in range(len(event_id_100)):
        cells_all.append(pd.read_csv('train_100_events/event0000010%d-cells.csv' % event_id_100[i]))

    for i in range(len(event_id_10)):
        hits_all.append(pd.read_csv('train_100_events/event00000100%d-hits.csv' % event_id_10[i]))
    for i in range(len(event_id_100)):
        hits_all.append(pd.read_csv('train_100_events/event0000010%d-hits.csv' % event_id_100[i]))

    for i in range(len(event_id_10)):
        particles_all.append(pd.read_csv('train_100_events/event00000100%d-particles.csv' % event_id_10[i]))
    for i in range(len(event_id_100)):
        particles_all.append(pd.read_csv('train_100_events/event0000010%d-particles.csv' % event_id_100[i]))

    for i in range(len(event_id_10)):
        truth_all.append(pd.read_csv('train_100_events/event00000100%d-truth.csv' % event_id_10[i]))
    for i in range(len(event_id_100)):
        truth_all.append(pd.read_csv('train_100_events/event0000010%d-truth.csv' % event_id_100[i]))
    return cells_all , hits_all , particles_all , truth_all


In [4]:
# start = time.time()

# cells_all = func_import_100_sample()[0]
# hits_all = func_import_100_sample()[1]
# particles_all = func_import_100_sample()[2]
# truth_all = func_import_100_sample()[3]

# end = time.time()
# run_time = end - start
# print(run_time, "s")

### Making functions

In [5]:
def func_cleaning_data(cells , hits, particles , truth):
    #Finding all hit_id that is noice, to use in other files for removing nocie in them.
    def noice(truth):
        truth_hit_id_noice = [truth.hit_id[i] for i in range(len(truth)) if truth.particle_id[i] == 0]
        return truth_hit_id_noice
    truth_hit_id_noice_list = noice(truth)
    #Removing all the noice in the truth file:
    truth_zero_noice = truth.drop(truth.index[truth['particle_id'] == 0]).reset_index()
    #Removing all the data where the nhits is >=3 :
    particles_zero_noice = particles.drop(particles.index[particles['nhits'] <= 3]).reset_index()
    #Sorting the particles
    particles_zero_noice_sorted_unique = particles_zero_noice.sort_values(by = "particle_id",ascending=True)
    #Making a function that can remove all row that has a value in a list
    def FRBV(file_name, column_name, list_of_values):
        return file_name[~file_name[column_name].isin(list_of_values)]
    #Removing all the noice in the cells file:
    cells_zero_noice = FRBV(cells , "hit_id" , truth_hit_id_noice_list).reset_index()
    #Removing all the noice in the hits file:
    hits_zero_noice = FRBV(hits , "hit_id" , truth_hit_id_noice_list).reset_index()
    #Making a function that can remove all the data, that has a nhits over 7.
    def nhit_over_7(data):
        data = [data.particle_id[i] for i in range(len(data)) if data.nhits[i] > 7]
        return data
    #Removing all the data where nhits is less then 7
    particle_id_with_nhits_over_7 = nhit_over_7(particles_zero_noice_sorted_unique)
    #Removing all the data where nhits is over then 7
    particle_id_with_nhits_lees_7 = FRBV(particles_zero_noice_sorted_unique , "particle_id" , particle_id_with_nhits_over_7).drop("index",axis = 1).reset_index().drop("index",axis = 1)
    #Removing all the data where a particle_id has more then 7 nhits.
    truth_zero_noice_nhits_lees_7 = FRBV(truth_zero_noice , "particle_id" , particle_id_with_nhits_over_7).drop("index",axis = 1).reset_index().drop("index",axis = 1)
    #Making a function that can make a list of the hit_ids that has a weight of 0.
    def weight_equle_0(data):
        data = [data.hit_id[i] for i in range(len(data)) if data.weight[i] == 0]
        return data
    #Using the weight_equle_0 function to make a list of hit_id´s that has a weight equle 0
    truth_weight_0_list = weight_equle_0(truth_zero_noice_nhits_lees_7)
    #Using the list of hit_id´s that has a weight equle 0, to remove the rows in truth that has that hit_id.
    truth_zero_noice_nhits_lees_7_weight_0 = FRBV(truth_zero_noice_nhits_lees_7,"hit_id",truth_weight_0_list).reset_index().drop("index",axis = 1)
    
    #Removing the data where the particle_id has less then 7 nhits.
    truth_zero_noice_nhits_over_7 = FRBV(truth_zero_noice , "particle_id" ,truth_zero_noice_nhits_lees_7.particle_id).drop("index",axis = 1).reset_index().drop("index",axis = 1)
    #Removing the data where the hit_id has less the 7 nhits.
    hits_zero_noice_nhits_lees_7 = FRBV(hits_zero_noice, "hit_id",truth_zero_noice_nhits_over_7["hit_id"]).drop("index",axis = 1).reset_index().drop("index",axis = 1)
    #Making a list of all the particle_ids that has over 3 nhits
    particles_id_over_3 = [particles.particle_id[i] for i in range(len(particles)) if particles.nhits[i] > 3]
    #Removing all the data in truth that has less the 3 nhits. Used for later in cells and hits.  
    truth_zero_noice_over_3 = FRBV(truth_zero_noice,"particle_id",particles_id_over_3).drop("index",axis = 1).reset_index().drop("index",axis = 1)
    #Removing all the data in hits that has over the 3 nhits.
    hits_zero_noice_nhits_lees_7_over_3_with_weight_0 = FRBV(hits_zero_noice_nhits_lees_7,"hit_id",truth_zero_noice_over_3.hit_id).reset_index().drop("index",axis = 1)
    #Making a list of hit_id that is not in truth but is in hits
    hit_id_in_hits_but_not_in_truth = FRBV(hits_zero_noice_nhits_lees_7_over_3_with_weight_0,"hit_id",truth_zero_noice_nhits_lees_7_weight_0.hit_id).reset_index().drop("index",axis = 1)
    #Using that hits not in truth, and then removing them from hits. 
    hits_zero_noice_nhits_lees_7_over_3_without_weight_0 = FRBV(hits_zero_noice_nhits_lees_7_over_3_with_weight_0,"hit_id",hit_id_in_hits_but_not_in_truth.hit_id).reset_index().drop("index",axis = 1)

    #Removing the data where the hit_id has less the 7 nhits.
    cells_zero_noice_nhits_lees_7 = FRBV(cells_zero_noice, "hit_id",truth_zero_noice_nhits_over_7["hit_id"]).drop("index",axis = 1).reset_index().drop("index",axis = 1)
    #Removing all the data in cells that has over the 3 nhits.
    cells_zero_noice_nhits_lees_7_over_3_with_weight_0 = FRBV(cells_zero_noice_nhits_lees_7,"hit_id",truth_zero_noice_over_3.hit_id).reset_index().drop("index",axis = 1)
    #Making a list of hit_id that is not in truth but is in cells
    hit_id_in_cells_but_not_in_truth = FRBV(cells_zero_noice_nhits_lees_7_over_3_with_weight_0,"hit_id",truth_zero_noice_nhits_lees_7_weight_0.hit_id).reset_index().drop("index",axis = 1)
    #Using that cells hit_id not in truth, and then removing them from cells.
    cells_zero_noice_nhits_lees_7_over_3_without_weight_0 = FRBV(cells_zero_noice_nhits_lees_7_over_3_with_weight_0,"hit_id",hit_id_in_cells_but_not_in_truth.hit_id).reset_index().drop("index",axis = 1)
    
    return cells_zero_noice_nhits_lees_7_over_3_without_weight_0 , hits_zero_noice_nhits_lees_7_over_3_without_weight_0 , particle_id_with_nhits_lees_7 , truth_zero_noice_nhits_lees_7_weight_0

In [6]:
Start = time.time()

cells , hits , particles, truth = func_cleaning_data(cells_train_100,hits_train_100,particles_train_100,truth_train_100)

end = time.time()
run_time = end - Start
print(round(run_time,4), "s")

1.3241 s


In [7]:
print(len(cells),len(hits),len(particles),len(truth))



56883 7265 1323 7265


In [16]:
print(hits)
print(truth)

      hit_id           x           y       z  volume_id  layer_id  module_id
0        206 -118.573997 -122.498001 -1502.0          7         2         15
1        498   72.551003  -94.188499 -1498.0          7         2         38
2        532   72.722801  -94.425003 -1502.0          7         2         40
3        552   44.314999  -28.215500 -1497.5          7         2         42
4        579   44.448200  -28.322701 -1502.5          7         2         44
...      ...         ...         ...     ...        ...       ...        ...
7260  120791  246.906006  919.896973  2952.5         18        12         70
7261  120808   85.988800  844.984985  2944.5         18        12         73
7262  120843 -337.717987  791.450989  2944.5         18        12         81
7263  120854 -477.261993  812.588989  2952.5         18        12         82
7264  120901 -772.278992  353.083008  2947.5         18        12         91

[7265 rows x 7 columns]
      hit_id         particle_id          tx       

In [8]:
Start = time.time()

hits_new = hits.merge(truth, how='left', on='hit_id')




#hits_new = hits_new.sort_values(by = ["particle_id","volume_id"],ascending=True)

#hits_new = hits_new[[ "hit_id","particle_id" ,"volume_id", "layer_id"]]

#print(hits_new.loc[hits_new['particle_id'] == 734116219916910592])


asd = []
for i in range(len(np.unique(hits_new['particle_id']))):
    asd.append(hits_new.loc[hits_new['particle_id'] == np.unique(hits_new['particle_id'])[i]])

asd = np.array(asd,dtype=object)
print(asd[0].particle_id,asd[0].x,asd[0].y,asd[0].z)


end = time.time()
run_time = end - Start
print(round(run_time,5), "s")

5318    4504424277872641
5355    4504424277872641
5971    4504424277872641
6598    4504424277872641
6599    4504424277872641
Name: particle_id, dtype: int64 5318   -454.404999
5355   -451.477997
5971   -620.114014
6598   -851.799011
6599   -844.419006
Name: x, dtype: float32 5318    213.024002
5355    212.059006
5971    220.647995
6598   -258.533997
6599   -282.459991
Name: y, dtype: float32 5318    -359.799988
5355    -360.799988
5971    -314.399994
6598   -1225.500000
6599   -1217.500000
Name: z, dtype: float32
0.86084 s


### Calling data cleaning data

In [9]:
# Start = time.time()

# for i in range(100):
#     cells_all_clean , hits_all_clean , particles_all_clean, truth_all_clean  = func_cleaning_data(cells_all[i],hits_all[i],particles_all[i],truth_all[i])

# end = time.time()
# run_time = end - Start
# print(round(run_time,4), "s")

In [10]:
Start = time.time()

def car_to_cyl_cood(x,y,z):
    r = np.sqrt(x**2 + y**2)
    phi = np.arctan2(y,x)
    z = z
    return r , phi , z 

r_hits , phi_hits , z_hits = car_to_cyl_cood(hits.x,hits.y,hits.z)
r_truth , phi_truth , z_truth = car_to_cyl_cood(truth.tx,truth.ty,truth.tz)


end = time.time()
run_time = end - Start
print(run_time, "s")

0.0029916763305664062 s


In [11]:
def edge_feat(r,phi,z):
    r = list(r)
    phi = list(phi)
    z = list(z)
    dphi = []
    z_0 = []
    delta_phi = []
    eta = [] # the pseudoangular
    delta_eta = []
    delta_z = []
    for i in range(len(phi)):
        eta.append(-np.log(abs(math.tan(phi[i])/2)))
    for i in range(0,len(r)-1): #i=1  , i=2
        for k in range(1,len(r)): #k=1-len(r) ,k=1-len(r)
            dp = phi[k] - phi[i]
            delta_phi.append(dp)
            delta_eta_values = eta[k] - eta[i]
            delta_eta.append(delta_eta_values)
            delta_z_values = z[k] - z[i]
            delta_z.append(delta_z_values)
            dr = r[k] - r[i]
            dpi = dp/dr
            z0 = z[i] - r[i]*((z[k] - z[i])/(r[k]-r[i]))
            if abs(dpi) < 0.0006 and abs(z0) < 200:
                dphi.append(dpi)
                z_0.append(z0)
        return dphi, z_0 , eta , delta_phi , delta_eta, delta_z


dphi, z_0 , eta , delta_phi , delta_eta, delta_z = edge_feat(r_truth,phi_truth,z_truth)

def delta_R(delta_eta,delta_phi):
    delta_R = []
    for i in range(len(delta_eta)):
        delta_R.append((delta_eta[i]**2 + delta_phi[i]**2)**(1/2))
    return delta_R

print(len(delta_R(delta_eta,delta_phi)))

def calc_eta(r, z):
    theta = np.arctan2(r, z)
    return -1. * np.log(np.tan(theta / 2.))

# def calc_eta2(phi):
#     return -1. * np.log(np.tan(phi / 2.))


print(min(calc_eta(r_truth, z_truth)),max(calc_eta(r_truth, z_truth)))
# print(min(calc_eta2(phi_truth)),max(calc_eta2(phi_truth)))
print(min(eta),max(eta))

7264
-4.427437782287598 4.306934356689453
-8.569178239140252 9.096890895613484


In [13]:
asd = []
for i in range(len(np.unique(hits_new['particle_id']))):
    asd.append(hits_new.loc[hits_new['particle_id'] == np.unique(hits_new['particle_id'])[i]])

asd = np.array(asd,dtype=object)
print(asd[0].particle_id,asd[0].x,asd[0].y,asd[0].z)

5318    4504424277872641
5355    4504424277872641
5971    4504424277872641
6598    4504424277872641
6599    4504424277872641
Name: particle_id, dtype: int64 5318   -454.404999
5355   -451.477997
5971   -620.114014
6598   -851.799011
6599   -844.419006
Name: x, dtype: float32 5318    213.024002
5355    212.059006
5971    220.647995
6598   -258.533997
6599   -282.459991
Name: y, dtype: float32 5318    -359.799988
5355    -360.799988
5971    -314.399994
6598   -1225.500000
6599   -1217.500000
Name: z, dtype: float32


In [14]:
asd[0]

Unnamed: 0,hit_id,x,y,z,volume_id,layer_id,module_id,particle_id,tx,ty,tz,tpx,tpy,tpz,weight
5318,86943,-454.404999,213.024002,-359.799988,13,6,541,4504424277872641,-454.397003,213.037003,-359.854004,0.18721,-0.058768,-0.051357,1.7e-05
5355,87286,-451.477997,212.059006,-360.799988,13,6,619,4504424277872641,-451.453003,212.097,-360.662994,0.186594,-0.059995,-0.051656,9e-06
5971,93476,-620.114014,220.647995,-314.399994,13,8,811,4504424277872641,-620.111023,220.654007,-314.825012,0.194471,0.03723,-0.051029,2.9e-05
6598,107111,-851.799011,-258.533997,-1225.5,16,12,5,4504424277872641,-854.073975,-259.233002,-1225.5,-0.085292,0.165477,-0.059998,1.7e-05
6599,107117,-844.419006,-282.459991,-1217.5,16,12,6,4504424277872641,-841.767029,-281.515015,-1217.5,-0.097869,0.162592,-0.057692,9e-06


In [12]:

def nodes_and_features(r)





SyntaxError: invalid syntax (Temp/ipykernel_7960/3761583642.py, line 1)

### Kigger på data

### Ploting data pre GNN

In [None]:
def plotting_data(data_rough,data_clean):
    XYZ_rough = [[],[],[]]
    XYZ_clean = [[],[],[]]
    for i, idx in enumerate(range(len(data_rough.x))):
        if abs(data_rough.z[i]) < 2000:
            XYZ_rough[0].append((data_rough.x[idx]))
            XYZ_rough[1].append((data_rough.y[idx]))
            XYZ_rough[2].append((data_rough.z[idx]))
    print(len(XYZ_rough[0]), len(XYZ_rough[1]), len(XYZ_rough[2]))

    for i, idx in enumerate(range(len(data_clean.x))):
        if abs(data_clean.z[i]) < 2000:
            XYZ_clean[0].append((data_clean.x[idx]))
            XYZ_clean[1].append((data_clean.y[idx]))
            XYZ_clean[2].append((data_clean.z[idx]))
    print(len(XYZ_clean[0]), len(XYZ_clean[1]), len(XYZ_clean[2]))
    return XYZ_rough , XYZ_clean

XYZ_rough , XYZ_clean = plotting_data(hits_train_100,hits)

In [None]:
zoom = 2000
figsize = 5
alpha = 0.35

fig = plt.figure(1, figsize = (figsize,figsize))
ax = plt.axes(projection='3d')
ax.scatter3D(XYZ_rough[0],XYZ_rough[1],XYZ_rough[2], c = XYZ_rough[0], alpha = alpha)
plt.xlim(-zoom,zoom)
plt.ylim(-zoom,zoom)
ax.set_zlim(-zoom,zoom)


fig = plt.figure(2, figsize = (figsize,figsize))
ax = plt.axes(projection='3d')
ax.scatter3D(XYZ_clean[0],XYZ_clean[1],XYZ_clean[2], c = XYZ_clean[0] , alpha = alpha)
plt.xlim(-zoom,zoom)
plt.ylim(-zoom,zoom)
ax.set_zlim(-zoom,zoom)

In [None]:
plt.figure(1)
plt.pie(particles.groupby('q')['vx'].count(), labels=['negative', 'positive'],autopct='%.0f%%',shadow=True, radius=1,textprops=dict(color="w"))
plt.title('Distribution of particle charges:',color = "white")
plt.show()

### Looking at the detector

In [None]:
x = data_detectors.cx
y = data_detectors.cy
z = data_detectors.cz
fig = plt.figure(5, figsize = (10,10))
ax = plt.axes(projection='3d')
ax.scatter3D(z , y ,x, c = y, alpha = 0.5)