# EM shower reconstruction at SND@LHC

1. __make sure the preprocessing has already been done__

2. __make sure `results` folder exists__

https://arxiv.org/pdf/2002.08722.pdf

In [1]:
# save the results folder in an archive
# you can download it from the Jupyter file manager
# !tar chvfz archive_results.tar.gz results/*

results/bayesian_results/
results/bayesian_results/9X0_file/
results/bayesian_results/9X0_file/8_9X0_coordconv.pt
results/bayesian_results/9X0_file/4_9X0_coordconv.pt
results/bayesian_results/9X0_file/9_9X0_coordconv.pt
results/bayesian_results/9X0_file/0_9X0_coordconv.pt
results/bayesian_results/9X0_file/7_9X0_coordconv.pt
results/bayesian_results/9X0_file/1_9X0_coordconv.pt
results/bayesian_results/9X0_file/2_9X0_coordconv.pt
results/bayesian_results/9X0_file/6_9X0_coordconv.pt
results/bayesian_results/9X0_file/5_9X0_coordconv.pt
results/bayesian_results/9X0_file/3_9X0_coordconv.pt
results/bayesian_results/NN_performance.txt
results/bayesian_results/PredE_file/
results/bayesian_results/PredE_file/9_PredE_test.npy
results/bayesian_results/PredE_file/1_PredE_test.npy
results/bayesian_results/PredE_file/3_PredE_test.npy
results/bayesian_results/PredE_file/5_PredE_test.npy
results/bayesian_results/PredE_file/8_PredE_test.npy
results/bayesian_results/PredE_file/2_PredE_test.npy
results/ba

In [2]:
# imports from utils.py & net.py
from utils import DataPreprocess, Parameters
#from net import SNDNet, BNN, MyDataset, digitize_signal, digitize_signal_1d

# python
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib import pylab as plt
import time
from tqdm import tqdm
from IPython import display

# system
import os
import gc  # Gabage collector interface (to debug stuff)
import sys

# ml
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# dl
import torch
import torch.nn as nn

Welcome to JupyROOT 6.18/00


In [3]:
# Test to see if cuda is available or not + listed the CUDA devices that are available
try:
    assert(torch.cuda.is_available())
except:
    raise Exception("CUDA is not available")
    
n_devices = torch.cuda.device_count()
print("CUDA devices available:")

for i in range(n_devices):
    print("\t{}\twith CUDA capability {}".format(torch.cuda.get_device_name      (device=i), 
                                                 torch.cuda.get_device_capability(device=i)))

device = torch.device("cuda", 0)

CUDA devices available:
	Quadro RTX 4000	with CUDA capability (7, 5)
	Quadro RTX 4000	with CUDA capability (7, 5)


In [4]:
# Turn off interactive plotting: for long run it screws up everything
plt.ioff()

In [5]:
DETECTOR_PARAMS = Parameters("SNDatLHC")
DETECTOR_CONFIG = DETECTOR_PARAMS.snd_params[DETECTOR_PARAMS.configuration]

# number of planes of the detector
#NB_PLANE = dict()

#NB_PLANE['scifi']   = len(DETECTOR_CONFIG['SciFi_tracker']        ['TT_POSITIONS'])
#NB_PLANE['up_mu']   = len(DETECTOR_CONFIG['Mu_tracker_upstream']  ['TT_POSITIONS'])
#NB_PLANE['down_mu'] = len(DETECTOR_CONFIG['Mu_tracker_downstream']['TT_POSITIONS'])

## Data processing

Here we load and process __pickle__ files. 

In [6]:
from src.process_pickle import *

In [7]:
DATA_PATH = dict()
DATA_PATH['nuel']  = "~/snd_data/nue"
DATA_PATH['numu']  = "~/snd_data/numu"
DATA_PATH['nutau'] = "~/snd_data/nutau"

EVENTS_PER_FILE = 4000 # todo -> read from the files ?
FILES_NUM       = 8   # MAX=100 / todo -> read from directory ?

In [8]:
#scifi_arr, mu_arr, en_arr = load_dataframes(DETECTOR_PARAMS, 
#                                            DATA_PATH, EVENTS_PER_FILE, FILES_NUM)

In [9]:
#scifi_arr, mu_arr, en_arr = merge_events_arrays(scifi_arr, mu_arr, en_arr)

In [10]:
#en_arr = normalise_target_energy(en_arr)

## Data preparation

Here we prepare (load or, if needed, create) the datasets.

In [11]:
from src.operate_datasets import *

In [12]:
#create_dataset('true', DETECTOR_PARAMS, DATA_PATH, EVENTS_PER_FILE, FILES_NUM)

In [13]:
#create_dataset('sum', DETECTOR_PARAMS, DATA_PATH, EVENTS_PER_FILE, FILES_NUM)

In [14]:
#create_dataset('longitudal', DETECTOR_PARAMS, DATA_PATH, EVENTS_PER_FILE, FILES_NUM)

In [15]:
# create_dataset('projection', DETECTOR_PARAMS, DATA_PATH, EVENTS_PER_FILE, FILES_NUM)

In [16]:
# memory troubles!
# be very carefull when using this
### create_dataset('plane', DETECTOR_PARAMS, DATA_PATH)

In [17]:
# move the new datasets to the right folder
# !mv *.npz ../snd_data/d_data/new_dataset/

mv: cannot stat ‘*.npz’: No such file or directory


## Models

## Run models

In [19]:
#!jupyter nbconvert --to notebook --inplace --execute regression_*.ipynb

In [20]:
#!jupyter nbconvert --to notebook --inplace --execute nn_*.ipynb --ExecutePreprocessor.timeout=180

In [21]:
#!jupyter nbconvert --to notebook --inplace --execute bnn_*.ipynb

## Compare metrics

In [22]:
from src.model_evaluation import *

In [23]:
def present_scores(df):
    df = df.reindex(sorted(df.columns), axis=1)
    first_col = df.pop('Score')
    df.insert(0, 'Score', first_col)

    return df

In [24]:
present_scores(collect_all_scores())

Unnamed: 0,Score,BNN-on-full-sum,BNN-on-plane-sums,L2-on-full-sum,L2-on-plane-sums,L2-on-projections,L2-on-true-hits,NN-on-full-sum,NN-on-plane-sums,NN-on-projections,NN-on-true-hits
0,explained_variance_score,-0.223233,-0.67441,0.013989,0.279462,0.352173,0.12725,0.041599,0.229696,0.41445,-0.678216
1,max_error,1.470716,7.644205,0.962614,1.035705,1.083276,0.99907,1.037722,2.581156,1.048289,9.208755
2,mean_absolute_error,0.149465,0.148487,0.140651,0.113108,0.107232,0.125568,0.127296,0.113745,0.101093,0.142938
3,mean_squared_error,0.047402,0.053915,0.031907,0.023052,0.02075,0.027922,0.031332,0.024644,0.018756,0.053997
4,median_absolute_error,0.097231,0.117582,0.127065,0.092548,0.087339,0.107848,0.099727,0.090105,0.081676,0.10243
5,r2_score,-0.481636,-0.685225,0.002681,0.279462,0.352172,0.12725,0.020647,0.229695,0.414425,-0.6878
6,avg_resolution,1.007727,2.125408,2.37508,1.498581,1.400614,1.872374,1.554206,1.391067,1.315607,1.504756
7,std_resolution,6.96644,7.877072,8.835667,5.833365,5.602714,7.319886,6.038526,5.307643,5.473247,5.846328


In [25]:
present_scores(collect_all_scores(TEST_SCORES_DIR))

Unnamed: 0,Score,BNN-on-full-sum,BNN-on-plane-sums,L2-on-full-sum,L2-on-plane-sums,L2-on-projections,L2-on-true-hits,NN-on-full-sum,NN-on-plane-sums,NN-on-projections,NN-on-true-hits
0,explained_variance_score,-0.379093,-0.231031,0.015207,0.277664,0.337756,0.130124,0.046824,0.206026,0.400336,-0.62679
1,max_error,1.799817,3.049016,0.90742,0.856899,0.805914,0.894357,0.943051,2.518444,0.772028,3.027048
2,mean_absolute_error,0.156494,0.14266,0.140498,0.113324,0.107791,0.125348,0.126676,0.114243,0.101947,0.142334
3,mean_squared_error,0.052074,0.03941,0.031751,0.023037,0.020905,0.027743,0.031105,0.025322,0.018928,0.052131
4,median_absolute_error,0.10311,0.117356,0.12673,0.092459,0.087961,0.107367,0.09845,0.089956,0.081414,0.10154
5,r2_score,-0.632817,-0.235732,0.004419,0.277664,0.337663,0.130113,0.024685,0.206012,0.400277,-0.634606
6,avg_resolution,1.011794,2.000125,2.247229,1.406165,1.324153,1.76953,1.46298,1.30933,1.25971,1.421161
7,std_resolution,5.333859,5.988438,6.652317,4.396246,4.061178,5.507832,4.691659,4.100527,4.007199,4.610855


In [18]:
# Support Vector Regression (SVR)

'''
full_X, full_y = load_dataset('~/snd_data/new_dataset/', 'longitudal')

X_train, y_train, _, _ = split_dataset(full_X, full_y)
# min_clip = 25
# X_train, y_train = clip_dataset(X_train, y_train, min_clip)


from sklearn import svm

reg_svr = svm.SVR(gamma='scale')
#reg_svr = svm.LinearSVR(max_iter=10**5)

reg_svr.fit(X_train, y_train)

score_svr = reg_svr.score(X_train, y_train)

print('SVM: ', score_svr)

y_pred_svr = reg_svr.predict(X_train)

X_sum = X_train.sum(axis=1).reshape(-1,1)
y_sum = y_train.reshape(-1,1)
y_pred_svr = y_pred_svr.reshape(-1,1)


plot_res_vs_energy(X_sum, y_sum, y_pred_svr) 
plot_res_hist(y_sum, y_pred_svr)
plot_2d_energy_hist(X_sum, y_sum, y_pred_svr)
get_scores(y_sum, y_pred_svr)
'''

"\nfull_X, full_y = load_dataset('~/snd_data/new_dataset/', 'longitudal')\n\nX_train, y_train, _, _ = split_dataset(full_X, full_y)\n# min_clip = 25\n# X_train, y_train = clip_dataset(X_train, y_train, min_clip)\n\n\nfrom sklearn import svm\n\nreg_svr = svm.SVR(gamma='scale')\n#reg_svr = svm.LinearSVR(max_iter=10**5)\n\nreg_svr.fit(X_train, y_train)\n\nscore_svr = reg_svr.score(X_train, y_train)\n\nprint('SVM: ', score_svr)\n\ny_pred_svr = reg_svr.predict(X_train)\n\nX_sum = X_train.sum(axis=1).reshape(-1,1)\ny_sum = y_train.reshape(-1,1)\ny_pred_svr = y_pred_svr.reshape(-1,1)\n\n\nplot_res_vs_energy(X_sum, y_sum, y_pred_svr) \nplot_res_hist(y_sum, y_pred_svr)\nplot_2d_energy_hist(X_sum, y_sum, y_pred_svr)\nget_scores(y_sum, y_pred_svr)\n"