In [3]:
import uproot

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import awkward as ak
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor, as_completed
from tqdm import tqdm


from search_worker import process_single_file

# Comparing reconstruction with raw data.

Testing on proton candidates: mass cut (600-1600 MeV), 1 track reconstructed only.  

In [4]:
ONE_TRACK_PROTON_EVENTS = "/Users/user/data/research/proton-deuteron/csv/selectedprotons_onetrack_eventdisplay.csv"
ALL_RAW_PROTONS = "/Users/user/data/research/proton-deuteron/rawprotons/proton_extracted_root" # only mass cut
RECO = "/Users/user/data/research/proton-deuteron/csv/plottingdata.csv"

In [5]:
class EventSearchViewer():
    """
    An optimized event viewer that uses parallel processing to quickly
    search through ROOT files for specified events.
    """
    def __init__(self, df, root_files_dir):
        self.df = df
        self.root_files_dir = Path(root_files_dir)
        self.matched_events = []

    def search_files(self, max_workers=None):
        """
        Search all ROOT files in parallel for events that match df.
        """
        print("Optimizing target event list...")
        target_events_set = set(zip(
            self.df['run'].astype(int),
            self.df['subrun'].astype(int),
            self.df['event'].astype(int)
        ))

        root_files = list(self.root_files_dir.glob("*.root"))
        print(f"Found {len(root_files)} ROOT files. Starting parallel search...")

        matched = []
        with ProcessPoolExecutor(max_workers=max_workers) as executor:
            futures = [executor.submit(process_single_file, path, target_events_set) for path in root_files]
            
            for future in tqdm(as_completed(futures), total=len(root_files), desc="Searching Files"):
                result = future.result()
                if result:
                    matched.extend(result)
        
        print(f"\nSearch complete! Found {len(matched)} matching events.")
        self.matched_events = sorted(matched, key=lambda x: (x['run'], x['subrun'], x['event']))
        return self.matched_events

    def _get_event_image_data(self, match_index):
        """
        Internal helper to fetch and process data for plotting using the correct
        physical mapping of channels to a fixed canvas.
        """
        if not (0 <= match_index < len(self.matched_events)):
            print(f"Match index {match_index} out of range.")
            return None, None

        event_info = self.matched_events[match_index]
        try:
            with uproot.open(event_info['file_path']) as root_file:
                tree = root_file["ana/raw"]
                event_start = event_info['event_index_in_file']
                event_data = tree.arrays(
                    ["raw_rawadc", "raw_channel"],
                    entry_start=event_start, entry_stop=event_start + 1, library="ak"
                )
                
                if len(event_data) == 0:
                    print(f"Warning: No data found in file for event at index {match_index}.")
                    return None, None
                
                # Use ak.to_numpy for explicit conversion
                adc_data = ak.to_numpy(event_data["raw_rawadc"][0])
                channel_map = ak.to_numpy(event_data["raw_channel"][0])
                
                # Reshape based on the total number of channels present in the data
                num_channels_in_event = len(channel_map)
                if num_channels_in_event == 0:
                    print(f"Warning: Event at index {match_index} has no channel data.")
                    return None, None
                
                num_ticks = len(adc_data) // num_channels_in_event
                adc_data_2d = adc_data.reshape((num_channels_in_event, num_ticks))

                # Create a fixed-size canvas for the collection plane (240 wires)
                collection_plane = np.zeros((240, num_ticks))

                # Populate the canvas at the correct physical wire location
                for i, channel_num in enumerate(channel_map):
                    if 240 <= channel_num < 480:
                        wire_index = channel_num - 240 # Map channel 240 to index 0
                        collection_plane[wire_index, :] = adc_data_2d[i, :]
                
                title = f"Collection Plane - Run {event_info['run']}, Subrun {event_info['subrun']}, Event {event_info['event']}"
                
                # Transpose for plotting (Time vs. Wire)
                return collection_plane.T, title
                
        except Exception as e:
            print(f"Error processing data for event at index {match_index}: {e}")
            return None, None

    def show_event(self, match_index=0):
        """Shows the collection plane event display for a specific matched event."""
        image_data, title = self._get_event_image_data(match_index)
        
        if image_data is None:
            print(f"Could not generate plot for event at index {match_index}.")
            return
        
        print(f"Displaying: {title}")
        plt.figure(figsize=(10, 5))
        sns.heatmap(image_data, cmap="viridis", cbar_kws={'label': 'ADC Counts'})
        plt.title(title)
        # Correctly label the axes
        plt.xlabel("Collection Plane Wire Number (0-239)")
        plt.ylabel("Time Tick")
        plt.gca().invert_yaxis()
        plt.tight_layout()
        plt.show()

    def save_all_images(self, output_dir="event_images"):
        """
        Generates and saves a PNG image for every matched event.
        """
        if not self.matched_events:
            print("No matches found. Run .search_files() first.")
            return
            
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)
        print(f"Saving {len(self.matched_events)} images to '{output_path.resolve()}'...")

        for i, event_info in enumerate(tqdm(self.matched_events, desc="Saving Images")):
            image_data, title = self._get_event_image_data(i)
            
            if image_data is None:
                continue

            filename = f"run_{event_info['run']}_subrun_{event_info['subrun']}_event_{event_info['event']}.png"
            full_path = output_path / filename

            if full_path.exists():
                continue  # Skip if already saved


            plt.figure(figsize=(10, 5))
            sns.heatmap(image_data, cmap="viridis", cbar_kws={'label': 'ADC Counts'})
            plt.title(title)
            # Correctly label the axes
            plt.xlabel("Collection Plane Wire Number (0-239)")
            plt.ylabel("Time Tick")
            plt.gca().invert_yaxis()
            plt.tight_layout()
            
            plt.savefig(full_path, dpi=90, bbox_inches='tight')
            plt.close()

        print("\nFinished saving all images.")

    def list_matches(self):
        """List all matched events"""
        if not self.matched_events:
            print("No matches found. Run .search_files() first.")
            return

        print("\nMatched events found:")
        for i, event in enumerate(self.matched_events):
            print(f"{i}: Run {event['run']}, Subrun {event['subrun']}, Event {event['event']} (from {event['filename']})")
        print(f"\nTotal: {len(self.matched_events)} matches")

In [6]:
reco_df = pd.read_csv(RECO)

In [7]:
reco_df

Unnamed: 0,run,subrun,event,track_idx,trkvtxx,trkvtxy,trkvtxz,trkendx,trkendy,trkendz,trklength,wctrk_XFaceCoor,wctrk_YFaceCoor,wctrk_momentum,wctrk_theta,wctrk_phi
0,8675,54,5807,0,19.64471,5.93438,3.40710,14.36033,4.18944,31.65193,28.86125,18.45386,5.46417,845.25305,0.07892,-3.13656
1,8675,54,5821,0,22.14739,6.86254,3.35288,19.86873,7.10694,30.00206,26.77493,22.24053,7.68748,786.46625,0.07488,-3.10346
2,8675,54,5844,0,21.10393,2.34666,3.17951,19.45838,2.40517,11.50735,8.49927,22.95857,-0.91929,695.66016,0.06052,-3.12150
3,8675,68,7061,0,16.81808,-0.93871,3.23513,16.00233,-3.97466,23.08302,20.37756,18.36346,-0.34143,801.40570,0.08476,-3.03038
4,8675,88,9222,0,26.47159,-5.76675,2.65268,26.13903,-5.70048,8.23611,5.59422,24.87471,-5.91387,658.67987,0.06195,-2.84671
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7631,9781,103,16997,0,18.72320,0.93223,3.74945,15.86716,0.09138,25.89450,22.35578,19.96970,0.82272,849.26324,0.08109,-3.08616
7632,9781,109,17945,0,17.92384,-8.18403,3.10379,14.58660,-10.27835,24.98164,22.24012,17.62879,-5.06135,768.64062,0.07746,-2.87973
7633,9781,109,17974,0,19.55410,3.15253,3.18123,18.51553,3.68878,8.57229,5.52662,20.07806,0.57546,676.74304,0.06135,3.11469
7634,9781,112,18441,0,22.34751,-7.74492,3.35788,19.09847,-7.69374,34.85216,31.67954,21.13656,-5.71262,848.36969,0.07988,-2.92997
