In [23]:
import random
import h5py
import numpy as np
import tqdm
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d

In [24]:
file = h5py.File('output_digi_HDF_Mg22_Ne20pp_8MeV.h5', 'r')
#print(file["Event_[3]"][:][:])

In [25]:
sample_size = 128
keys = list(file.keys())
length = len(keys)

In [26]:
file_name = 'Mg22_size' + str(sample_size) + '.h5'
new = h5py.File(file_name, 'a')
for n in tqdm.tqdm(range(length)):
    name = keys[n]
    event = file[name]
    ev_len = len(event)
    event_data = np.zeros((ev_len, 12), float)
    #converting event into an array
    for i,e in enumerate(event):
        instant = np.array(list(e))
        event_data[i] = np.array(instant)    
    new_event = np.zeros((sample_size, 13), float)
    #making new array for normalized event
    if ev_len == sample_size:
        new_event[:,:-1] = event_data[:,:]
    else:        #length2 != sample_size:
        for i in range(sample_size):
            row = random.randint(0, ev_len - 1)
            new_event[i,:-1] = event_data[row,:]
    unique_point_ids = np.unique(event_data[:,5])    #array of unqiue particles IDs
    new_event[0,-1] = unique_point_ids.size - 1    #number of unique particles-- i.e. number of tracks-- starting at 0
    new.create_dataset(name, data = new_event)

100%|██████████| 10000/10000 [08:29<00:00, 19.62it/s]


In [27]:
print(new['Event_[3]'].shape)
print(new['Event_[3]'][:][:])

(128, 13)
[[ 2.20870667e+01  3.25545006e+01  5.13600000e+02 ...  4.00000000e+00
   2.00000000e+00  3.00000000e+00]
 [ 7.36236668e+00 -7.05056143e+00  5.23200000e+02 ...  1.00000000e+00
   1.00000000e+00  0.00000000e+00]
 [-1.33514404e-05  1.13012161e+01  5.90400000e+02 ...  1.00000000e+00
   1.00000000e+00  0.00000000e+00]
 ...
 [ 4.90825081e+00  2.83038445e+01  5.48800000e+02 ...  4.00000000e+00
   2.00000000e+00  0.00000000e+00]
 [-2.45412970e+00  1.55518732e+01  5.80800000e+02 ...  4.00000000e+00
   2.00000000e+00  0.00000000e+00]
 [ 2.45413303e+00 -1.84533787e+01  1.68000000e+02 ...  1.00000000e+00
   1.00000000e+00  0.00000000e+00]]


In [28]:
#setting aside a test set from the all events dataset
name = 'Mg22_size' + str(sample_size)
whole = h5py.File(name + '.h5','r')
test = h5py.File(name + '_test.h5', 'a')
rest = h5py.File(name + '_rest.h5', 'a')
keys = list(whole.keys())
length = len(keys)
test_len = int(0.2*length)
test_set_indices = np.random.choice(range(length), test_len, replace=False)
for i in tqdm.tqdm(range(length)):
    event = whole[keys[i]]
    if np.isin(i, test_set_indices, assume_unique=True):
        test.create_dataset(keys[i], data = event)
    else:
        rest.create_dataset(keys[i], data = event)

100%|██████████| 10000/10000 [00:07<00:00, 1363.14it/s]


In [29]:
#splits remaining events data into training and validation sets
name = 'Mg22_size' + str(sample_size)
whole = h5py.File(name + '_rest.h5','r')
train = h5py.File(name + '_train.h5', 'a')
val = h5py.File(name + '_val.h5', 'a')
keys = list(whole.keys())
length = len(keys)
val_len = int(0.25*length)     #20% of 80%
val_set_indices = np.random.choice(range(length), val_len, replace=False)
for i in tqdm.tqdm(range(length)):
    event = whole[keys[i]]
    if np.isin(i, val_set_indices, assume_unique=True):
        val.create_dataset(keys[i], data = event)
    else:
        train.create_dataset(keys[i], data = event)

100%|██████████| 8000/8000 [00:05<00:00, 1359.61it/s]


In [41]:
print(len(list(train.keys())))
print(len(list(test.keys())))
print(len(list(val.keys())))

6000
2000
2000


In [30]:
data = h5py.File('Mg22_size128.h5', 'a')
keys = list(data.keys())
#print(keys)
num_tracks = np.zeros(len(keys),int)
for i in range(len(keys)):
    event = data[keys[i]]
    num_tracks[i] = int(event[0,12])
print(np.unique(num_tracks, return_counts=True))

(array([0, 1, 2, 3, 4, 5]), array([5002,  104, 2554, 2319,   16,    5]))


In [None]:
file = h5py.File('output_digi_HDF_Mg22_Ne20pp_8MeV.h5', 'r')
keys = list(file.keys())
length = len(keys)
num_tracks = np.zeros(length, int)
for n in tqdm.tqdm(range(length)):
    name = keys[n]
    event = file[name]
    ev_len = len(event)
    event_data = np.zeros((ev_len, 12), float)
    #converting event into an array
    for i,e in enumerate(event):
        instant = np.array(list(e))
        event_data[i] = np.array(instant)    
    num_tracks[n] = np.unique(event_data[:,5]).size - 1
print(np.unique(num_tracks, return_counts=True))

In [None]:
#creating data sets with fewer beam events