In [6]:
# LOAD PREDICTIONS
import numpy as np
import awkward as ak

# Set epoch and batch size
epoch = 99
BATCH_SIZE = 100  # Adjust this to your actual batch size

# Base directory for all delta files
model_file_path = "/data/mjovanovic/cell_particle_deposit_learning/delta_train/tr_100_val_10_tst_5_delta_1_track_1_n_3_pi0_lr_1e-2_BS_100_no_tnets_add_min_dist"

# Paths for labels and predictions
labels_path = model_file_path + "/tests/labels.npy"
preds_path = model_file_path + "/tests/preds_" + str(epoch) + ".npy"

# Load labels and predictions
labels_unmasked = ak.Array(np.load(labels_path, allow_pickle=True))
preds_unmasked = ak.Array(np.load(preds_path, allow_pickle=True))

# Filter labels and predictions based on some condition
labels = labels_unmasked[labels_unmasked[:,:,0] != -1]
preds = preds_unmasked[labels_unmasked[:,:,0] != -1]

# LOAD DATASET FROM PREPROCESSED TEST DATA FILES
len_file = 6000  # Assuming this is defined or known
i_low, i_high = 0, 4  # Assuming these are the range of file indices
delta_dir = "/data/mjovanovic/cell_particle_deposit_learning/delta/delta_processed_test_files/" 
files_name = "delta_full"
file_names = list(map(lambda i: delta_dir + files_name + "_len_" + str(len_file) + "_i_" + str(i) + ".npy", np.arange(i_low, i_high + 1)))
dataset = "delta"
# Initialize counts for event filtering
num_events_saved = 0
num_events_filtered = 0

# Load and filter data from files
for file_i, preprocessed_file_name in enumerate(file_names):
    events_arr = np.load(preprocessed_file_name, allow_pickle=True).item()
    num_events = len(events_arr["eventNumber"])
    print(f"Loaded {num_events} events from file {file_i + 1}/{len(file_names)}")

    # Event filtering logic
    for event_idx in range(num_events):
        num_tracks = events_arr["nTrack"][event_idx]
        decay_group = events_arr["decay_group"][event_idx]
        cluster_cell_ID = events_arr["cluster_cell_ID"][event_idx]

        # Modify your include flags and decay group dictionary as needed
        include_delta_p_pi0 = True
        include_delta_n_pipm = True
        include_delta_n_pi0 = True
        include_delta_p_pipm = True
        decay_group_dict = {"delta+_p": 1, "delta+_n": 2, "delta-": 3, "delta0_n": 4, "delta0_p": 5, "delta++": 6}
        niche_case = "1_track_1_n_3_pi0"
        delta_n_pipm_count = 0  # Reset for each file, or define logic to persist this count

        # Event filtering conditions
        if len(cluster_cell_ID) and \
            ((dataset == "delta" and ((num_tracks == 1 and (include_delta_p_pi0 and decay_group == decay_group_dict["delta+_p"]) or 
            (include_delta_n_pipm and decay_group in [decay_group_dict["delta+_n"], decay_group_dict["delta-"]]))) or 
            (num_tracks == 0 and include_delta_n_pi0 and decay_group == decay_group_dict["delta0_n"]) or 
            (num_tracks == 2 and include_delta_p_pipm and decay_group in [decay_group_dict["delta0_p"], decay_group_dict["delta++"]]))) or (dataset == "rho" and num_tracks == 1):

            # Niche case handling
            if niche_case == "1_track_1_n_3_pi0" and decay_group in [decay_group_dict["delta+_n"], decay_group_dict["delta-"]]:
                if delta_n_pipm_count == 0:
                    delta_n_pipm_count += 1
                else:
                    delta_n_pipm_count = (delta_n_pipm_count + 1) % 6
                    num_events_filtered += 1
                    continue  # Skip this event

            # Event passed all filters
            num_events_saved += 1
        else:
            num_events_filtered += 1  # Event didn't pass the filters

# Apply batch size filter at the end if needed
if BATCH_SIZE:
    num_events_saved = (num_events_saved // BATCH_SIZE) * BATCH_SIZE

# Output the results
print("num events (predictions)", len(preds_unmasked))
print("num events (loaded data):", num_events_saved)
print("num events filtered out:", num_events_filtered)


Loaded 5520 events from file 1/5
Loaded 5568 events from file 2/5
Loaded 5551 events from file 3/5
Loaded 5556 events from file 4/5
Loaded 5561 events from file 5/5
num events (predictions) 4000
num events (loaded data): 9600
num events filtered out: 18140


In [7]:
# Filter to batch size multiple number of samples/files
BS_multiple_num_samples = (num_events_saved // BATCH_SIZE) * BATCH_SIZE
print("BS_multiple_num_samples:", BS_multiple_num_samples)
print("num_events_saved:", num_events_saved)

# Check predictions vs data load
print("num events (predictions)", len(preds_unmasked))
print("num events (loaded data):", num_events_saved)

BS_multiple_num_samples: 9600
num_events_saved: 9600
num events (predictions) 4000
num events (loaded data): 9600


In [9]:
# Print unfiltered features
print("Unfiltered Features:")
print(features_unmasked)

# Print a few entries if the array is too large
print("First 5 Unfiltered Features:")
print(features_unmasked[:5])

# Print unfiltered labels
print("Filtered Labels:")
print(labels)

print("Predictions:")
print(preds)




Unfiltered Features:
[{eventNumber: 698367, nCluster: 3, truthPartE: [115, ...], ...}, ..., {...}]
First 5 Unfiltered Features:
[{eventNumber: 698367, nCluster: 3, truthPartE: [115, ...], ...}, ..., {...}]
Filtered Labels:
[[0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1], ..., [1, 0, 0, 0], [1, 0, 0, 0]]
Predictions:
[[0, 0, 0, 1], [0.00106, 0, 0, 0.999], [...], ..., [1, 0, 0, 0], [1, 0, 0, 0]]


In [3]:
for i in range(min(5, len(features_unmasked))):
    print(f"Event {i}:")
    print(f"  eventNumber: {features_unmasked[i]['eventNumber']}")
    print(f"  nCluster: {features_unmasked[i]['nCluster']}")
    print(f"  truthPartE (first 3 elements): {features_unmasked[i]['truthPartE'][:3]}")
    # ... add more fields if needed
    print()


Event 0:
  eventNumber: 698367
  nCluster: 3
  truthPartE (first 3 elements): [115, 90.1, 24.7]

Event 1:
  eventNumber: 698368
  nCluster: 2
  truthPartE (first 3 elements): [285, 217, 67.5]

Event 2:
  eventNumber: 698370
  nCluster: 3
  truthPartE (first 3 elements): [11.2, 10.1, 1.03]

Event 3:
  eventNumber: 698371
  nCluster: 1
  truthPartE (first 3 elements): [10.9, 10.1, 0.785]

Event 4:
  eventNumber: 698366
  nCluster: 8
  truthPartE (first 3 elements): [539, 372, 168]



In [4]:
for field in features_unmasked[0].fields:
    print(field)

eventNumber
nCluster
truthPartE
truthPartPt
cluster_E
cluster_E_LCCalib
cluster_EM_PROBABILITY
cluster_HAD_WEIGHT
truthPartPdgId
cluster_ENG_CALIB_TOT
cluster_Eta
cluster_cell_ID
cluster_cell_E
cluster_cell_hitsE_EM
cluster_cell_hitsE_nonEM
cluster_Pt
trackPhi_EMB1
trackPhi_EMB2
trackPhi_EMB3
trackPhi_EME1
trackPhi_EME2
trackPhi_EME3
trackPhi_HEC0
trackPhi_HEC1
trackPhi_HEC2
trackPhi_HEC3
trackPhi_TileBar0
trackPhi_TileBar1
trackPhi_TileBar2
trackPhi_TileGap1
trackPhi_TileGap2
trackPhi_TileGap3
trackPhi_TileExt0
trackPhi_TileExt1
trackPhi_TileExt2
trackEta_EMB1
trackEta_EMB2
trackEta_EMB3
trackEta_EME1
trackEta_EME2
trackEta_EME3
trackEta_HEC0
trackEta_HEC1
trackEta_HEC2
trackEta_HEC3
trackEta_TileBar0
trackEta_TileBar1
trackEta_TileBar2
trackEta_TileGap1
trackEta_TileGap2
trackEta_TileGap3
trackEta_TileExt0
trackEta_TileExt1
trackEta_TileExt2
nTrack
trackMass
trackEta
trackPhi
trackPt
trackP
cluster_cell_hitsTruthIndex
cluster_cell_hitsTruthE
truthPartEta
truthPartPhi
clus_idx
clus_em

In [20]:
print("Cluster cell E length: ",len(features_unmasked[0]["cluster_cell_E"]))
print("Cluster cell ID length: ",len(features_unmasked[0]["cluster_cell_ID"]))
print("Cluster cell ID hitsTruthE: ",len(features_unmasked[0]["cluster_cell_hitsTruthE"]))

print(len(labels))
print(len(preds))
print(len(features_unmasked["cluster_cell_E"]))
print(len(features_unmasked[0]["cluster_cell_E"]))

flattened_cluster_cell_E = ak.flatten(features_unmasked["cluster_cell_E"], axis=None)
print("Unrolled features_unmasked length: ", len(flattened_cluster_cell_E))

Cluster cell E length:  187
Cluster cell ID length:  187
Cluster cell ID hitsTruthE:  187
786512
786512
5530
187
Unrolled features_unmasked length:  1128625


In [17]:
#for idx in range(len(features_unmasked["cluster_cell_E"])):
for i in range(len(features_unmasked[0]["cluster_cell_E"])):
    print("--", i, features_unmasked[0]["cluster_cell_ID"][i], features_unmasked[0]["cluster_cell_hitsTruthE"][i])



-- 0 763376172 [9.29]
-- 1 763376680 []
-- 2 763376684 [6.47]
-- 3 763377198 [0.299]
-- 4 765563436 [1.2]
-- 5 765563438 [0.574]
-- 6 765564460 [5.72]
-- 7 765564970 [0.0201]
-- 8 765564974 [2.25]
-- 9 765565484 [41.6]
-- 10 765565486 [0.0025, 0.802]
-- 11 765565996 [81]
-- 12 765565998 [3.66]
-- 13 765566508 [380]
-- 14 765567018 [0.814]
-- 15 765567020 [2.35e+03]
-- 16 765567022 [2.01]
-- 17 765567530 [0.467]
-- 18 765567532 [540]
-- 19 765567534 [2.05]
-- 20 765568040 [2.1]
-- 21 765568044 [176]
-- 22 765568046 [49]
-- 23 765568554 [0.223]
-- 24 765568556 [66.8]
-- 25 765568558 [9.7]
-- 26 765569062 []
-- 27 765569064 [7]
-- 28 765569066 [4.91]
-- 29 765569068 [61.1]
-- 30 765569574 []
-- 31 765569576 [51.4]
-- 32 765569578 [71.7]
-- 33 765569580 [45.5]
-- 34 765569582 [3.67]
-- 35 765570086 []
-- 36 765570088 [19.2]
-- 37 765570090 [56.7]
-- 38 765570092 [27.7]
-- 39 765570600 [4.31]
-- 40 765570602 [16.8]
-- 41 765570604 [20.4]
-- 42 765571116 [30.9]
-- 43 765571118 [12.6]
-- 44 7