In [None]:
import numpy as np
import glob
import os
from models.JetPointNet import PointNetSegmentation


def load_data_from_npz(npz_file):
    data = np.load(npz_file)
    feats = data['feats']  # Shape: (num_samples, 859, 6)
    frac_labels = data['frac_labels']  # Shape: (num_samples, 859)
    tot_labels = data['tot_labels']  # Shape: (num_samples, 859)
    tot_truth_e = data['tot_truth_e']  # Shape: (num_samples, 859) (This is the true total energy deposited by particles into this cell)
    return feats, frac_labels, tot_labels, tot_truth_e

# Setup
os.environ['CUDA_VISIBLE_DEVICES'] = ""  # Disable GPU
model_path = "saved_model/PointNetModel.keras"

TEST_DIR = '/data/mjovanovic/jets/processed_files/2000_events_w_fixed_hits/SavedNpz/test'

model = PointNetSegmentation(num_points=278, num_classes=1)
model.load_weights(model_path)

npz_files = glob.glob(os.path.join(TEST_DIR, '*.npz'))



In [9]:
import matplotlib.pyplot as plt
import numpy as np

# Data collections
num_points_list = []
accuracies_list = []

# Lists to store total true energy and focused particle p_t for later binning
total_true_energy_samples = []
focused_particle_pts = []

# Minimum number of samples per bin to be included in the output
min_samples_per_bin = 10

# Process each file
for npz_file_idx, npz_file in enumerate(npz_files):
    feats, frac_labels, tot_labels, tot_truth_e = load_data_from_npz(npz_file)
    segmentation_logits = model.predict(feats)
    segmentation_logits = np.squeeze(segmentation_logits, axis=-1)

    for sample_idx in range(len(feats)):
        sample_features = feats[sample_idx]
        energies = tot_labels[sample_idx]
        tot_true_energy = tot_truth_e[sample_idx]

        energy_point_indices = sample_features[:, 6] == 1

        predicted_classes = np.where(segmentation_logits[sample_idx] > 0, 1, 0)
        true_classes = np.where(frac_labels[sample_idx] > 0.5, 1, 0)

        energies_filtered = tot_truth_e[sample_idx][energy_point_indices]
        predicted_classes_filtered = predicted_classes[energy_point_indices]
        true_classes_filtered = true_classes[energy_point_indices]

        correct_predictions = predicted_classes_filtered == true_classes_filtered
        correct_energy = np.sum(np.abs(energies_filtered[correct_predictions]))
        total_energy = np.sum(np.abs(energies_filtered))

        energy_weighted_accuracy = correct_energy / (total_energy + 1e-5) #if not (total_energy == 0 and correct_energy == 0) else 1

        # Collect total true energy and particle p_t for this sample
        total_true_energy_sample = np.sum(energies_filtered)
        focused_particle_pt = sample_features[0, 4]

        total_true_energy_samples.append(total_true_energy_sample)
        focused_particle_pts.append(focused_particle_pt)
        accuracies_list.append(energy_weighted_accuracy)

        print("Sample Index: ", sample_idx)
        print("Total number of cells in sample: ", np.sum(energy_point_indices))
        print("Predicted Count: ", np.sum(predicted_classes_filtered))
        print("True Count: ", np.sum(true_classes_filtered))
        print("Energy Weighted Accuracy: ", energy_weighted_accuracy)
        print()

# Creating bins based on the full range of collected data
energy_bins = np.linspace(min(total_true_energy_samples), max(total_true_energy_samples), num=21)
pt_bins = np.linspace(min(focused_particle_pts), max(focused_particle_pts), num=21)

# Dictionaries to accumulate data for the new bins
energy_acc = {bin_edge: [] for bin_edge in energy_bins}
pt_acc = {bin_edge: [] for bin_edge in pt_bins}

# Accumulate data into bins
for i in range(len(accuracies_list)):
    energy_bin_index = np.digitize(total_true_energy_samples[i], energy_bins) - 1
    pt_bin_index = np.digitize(focused_particle_pts[i], pt_bins) - 1

    if 0 <= energy_bin_index < len(energy_bins):
        energy_acc[energy_bins[energy_bin_index]].append(accuracies_list[i])
    if 0 <= pt_bin_index < len(pt_bins):
        pt_acc[pt_bins[pt_bin_index]].append(accuracies_list[i])

# Calculate and print the mean accuracies for each bin if the bin has at least min_samples_per_bin samples
print("Mean Weighted Accuracies by Total True Energy Bin:")
for bin_edge in energy_bins:
    if len(energy_acc[bin_edge]) >= min_samples_per_bin:
        print(f"Energy {bin_edge}: {np.mean(energy_acc[bin_edge])}")

print("Mean Weighted Accuracies by Focused Particle p_t Bin:")
for bin_edge in pt_bins:
    if len(pt_acc[bin_edge]) >= min_samples_per_bin:
        print(f"Particle p_t {bin_edge}: {np.mean(pt_acc[bin_edge])}")

# Calculate overall mean weighted accuracy
mean_weighted_accuracy = np.mean(accuracies_list)
print("\nOverall Mean Weighted Accuracy: ", mean_weighted_accuracy)


Sample Index:  0
Total number of cells in sample:  60
Predicted Count:  19
True Count:  29
Energy Weighted Accuracy:  0.4547001812466637

Sample Index:  1
Total number of cells in sample:  47
Predicted Count:  11
True Count:  15
Energy Weighted Accuracy:  0.5395569752178329

Sample Index:  2
Total number of cells in sample:  92
Predicted Count:  0
True Count:  83
Energy Weighted Accuracy:  0.16633492801869276

Sample Index:  3
Total number of cells in sample:  105
Predicted Count:  33
True Count:  65
Energy Weighted Accuracy:  0.557076987977972

Sample Index:  4
Total number of cells in sample:  39
Predicted Count:  39
True Count:  36
Energy Weighted Accuracy:  0.9927638961289836

Sample Index:  5
Total number of cells in sample:  56
Predicted Count:  0
True Count:  1
Energy Weighted Accuracy:  0.9994037543583838

Sample Index:  6
Total number of cells in sample:  95
Predicted Count:  17
True Count:  11
Energy Weighted Accuracy:  0.5338024870370822

Sample Index:  7
Total number of cel