In [1]:
import os
import h5py
import psutil
import sys
import time
import math
import warnings
import struct
import binascii
import pandas as pd
import numpy as np
import tensorflow as tf
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
%matplotlib inline

--------------------------------------------------------------------------
[[30000,1],0]: A high-performance Open MPI point-to-point messaging module
was unable to find any relevant network interfaces:

Module: OpenFabrics (openib)
  Host: compute201902

Another transport will be used instead, although this may result in
lower performance.

btl_base_warn_component_unused to 0.
--------------------------------------------------------------------------
2023-08-18 13:15:18.185355: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1


In [2]:
from datetime import datetime
from sklearn import preprocessing
from tqdm import tqdm,tqdm_notebook
from IPython.display import display, clear_output
from matplotlib.image import imread # read images
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KernelDensity
from scipy.signal import find_peaks
from numpy.polynomial.polynomial import polyfit
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras.models import load_model,Sequential
from tensorflow.keras.layers import Dense,Conv2D,MaxPool2D,Flatten,Activation,concatenate
from tensorflow.keras.optimizers import Adam #optimizer
from tensorflow.keras.callbacks import EarlyStopping,TensorBoard
from tensorflow.python.client import device_lib
from numpy.polynomial.polynomial import polyfit
from sklearn.preprocessing import MinMaxScaler # normalize and scale data
from sklearn.metrics import mean_squared_error,mean_absolute_error,explained_variance_score,r2_score

In [3]:
# Load the SavedModel
saved_model_dir = '/home/m962g264/wsu_Nova_Vertexer/output/h5_models/model_082023/Z_RHC_Model.h5'
model = load_model(saved_model_dir)

2023-08-18 13:19:31.449064: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2023-08-18 13:19:31.676429: E tensorflow/stream_executor/cuda/cuda_driver.cc:314] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-08-18 13:19:31.676471: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (compute201902): /proc/driver/nvidia/version does not exist
2023-08-18 13:19:31.693130: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [4]:
# creating directory paths for the test data sets
data_dir='/home/m962g264/research_repo/data-preprocess'
test_path=data_dir+'/ND_MC_Nominal_RHC_testdata/'
print('Test files validation path: \t{}'.format(test_path))
    
#must import ospath for our adjustment for the fhc set
import os.path
      
test_files = [n for n in os.listdir(test_path) if n.endswith(".h5")]
#Checking the size of the test file
test_idx=0
for h5_filename in os.listdir(test_path):
    test_idx=test_idx+len((h5py.File(test_path+h5_filename,'r')['run'][:]))
    
print('Number of validation files:',len(os.listdir(test_path)))
print('Number of validation events:',(test_idx))

Test files validation path: 	/home/m962g264/research_repo/data-preprocess/ND_MC_Nominal_RHC_testdata/
Number of validation files: 2000
Number of validation events: 72710


In [5]:
f=h5py.File(test_path+os.listdir(test_path)[0],'r')
print(list(f.keys()))

#for reading all test files
idx = 0
test_cvnmap, test_mode, test_iscc, truerecovtxx, truerecovtxy, truerecovtxz = ([] for i in range(6))

for h5_filename in os.listdir(test_path):
    if os.path.isdir(h5_filename): #skipping directories in the files
        continue
    
    print('Processing... {} of {}'.format(idx, len(os.listdir(test_path))), end="\r", flush=True)
    
    with h5py.File(test_path + h5_filename, 'r') as file:
        test_cvnmap.append(file['cvnmap'][:])
        test_mode = np.append(test_mode, file['Mode'][:], axis=0)
        test_iscc = np.append(test_iscc, file['isCC'][:], axis=0)
        truerecovtxz.append(file['TrueRecoVtxZ'][:])
    
    idx += 1

# Convert to NumPy arrays
truerecovtxz = np.array(truerecovtxz)

print('Test files read successfully')

['E', 'Mode', 'PDG', 'TrueRecoVtxX', 'TrueRecoVtxY', 'TrueRecoVtxZ', 'cvnmap', 'cycle', 'event', 'firstplane', 'isCC', 'lastcellx', 'lastcelly', 'run', 'slice', 'subrun']
Test files read successfully


In [6]:
#Normalizing CNN processing
idx=file=0
test_cvnmap_norm=[]
while file < (len(os.listdir(test_path))):
    test_cvnmap_norm.append(preprocessing.normalize(test_cvnmap[file],axis=1))
    file+=1
# convert to np array
test_cvnmap_norm=np.array(test_cvnmap_norm)

#True vertex extraction for analysis
truevtxz,recovtxz = ([] for i in range(2))
idx = 0

while idx < len(os.listdir(test_path)):
    print('Processing...', end="\r", flush=True)
    event = 0
    
    while event < truerecovtxz[idx].shape[0]:
        truevtxz = np.append(truevtxz, truerecovtxz[idx][event][0])
        recovtxz = np.append(recovtxz, truerecovtxz[idx][event][1])
        event += 1
    
    idx += 1

print('Testing preprocessing complete\n', end="\r", flush=True)

# convert to np arrays
truevtxz=np.array(truevtxz)
recovtxz=np.array(recovtxz)


Testing preprocessing complete


In [7]:
# split normalized cvnmap into reshaped events with multi-views
c, d, test_cvnmap_norm_resh, test_cvnmap_norm_resh_xz, test_cvnmap_norm_resh_yz = ([] for i in range(5))
file, event = 0, 0

while file < len(os.listdir(test_path)):
    print('Processing tests cvnmap file {} of {}'.format(file + 1, len(os.listdir(test_path))), end="\r", flush=True)
    c = test_cvnmap_norm[file]
    event = 0
    
    while event < c.shape[0]:
        d = c[event].reshape(2, 100, 80)
        test_cvnmap_norm_resh.append(d)
        test_cvnmap_norm_resh_xz.append(d[0])
        test_cvnmap_norm_resh_yz.append(d[1])
        event += 1
    
    file += 1

print('\ncvnmap processing complete')

Processing tests cvnmap file 2000 of 2000
cvnmap processing complete


**np array conversion for test preparation**

In [8]:
test_cvnmap_norm_resh_xz=np.array(test_cvnmap_norm_resh_xz) # xz views only

In [9]:
test_cvnmap_norm_resh_yz=np.array(test_cvnmap_norm_resh_yz) # yz views only

In [10]:
# add one more dimension to let the CNN know we are dealing with one color dimension
test_cvnmap_norm_resh_xz=test_cvnmap_norm_resh_xz.reshape(test_cvnmap_norm_resh_xz.shape[0],100,80,1)
test_cvnmap_norm_resh_yz=test_cvnmap_norm_resh_yz.reshape(test_cvnmap_norm_resh_yz.shape[0],100,80,1)
#batch_size,width,heigh,color_channels

In [11]:
full_predictions=model.predict([test_cvnmap_norm_resh_xz,test_cvnmap_norm_resh_yz])

In [12]:
#Saving full Prediction CSV file
full_true_df=pd.DataFrame(truevtxz,columns=['True Z'])
full_reco_df=pd.DataFrame(recovtxz,columns=['Reco Z'])
full_predictions=pd.Series(full_predictions.reshape(len(truevtxz),))
full_true_df=pd.concat([full_true_df,full_predictions],axis=1)
full_true_df=pd.concat([full_true_df,full_reco_df],axis=1)
full_true_df.columns=['True Z','Model Predictions','Reco Z']
np.savetxt("/home/m962g264/wsu_Nova_Vertexer/output/csv_files/z_modelPred_RHC_abdul.csv", full_true_df, delimiter=",")