In [10]:
import pandas as pd 
import os
import h5py
import numpy as np

# Import the recording session summary table 
recording_summary = "/Volumes/scratch/andrew.shelton/NPUltra_data/raw_npultra_data/NPUltra_recording_summary.xlsx"
recording_summary_table = pd.read_excel(recording_summary)

# Filter table for sessions of interest  
filtered_sessions = recording_summary_table[
    (recording_summary_table['experiment'] == 'NPUltra_psychedelics') &
    (recording_summary_table['uploaded to CO'] == 'yes')]

filtered_sessions.head()

session_list = filtered_sessions['session'].tolist()
session_list = session_list[:1] # For testing, only take the first session 

for session in range(len(session_list)):
    base_path = "/Volumes/scratch/andrew.shelton/NPUltra_data/raw_npultra_data/"
    session_folder = f"{base_path}{session_list[session]}"

    # Navigate to the processed_data/units/ folder within the session folder 
    if os.path.exists(session_folder):
        units_table_path = os.path.join(session_folder, "behavior")
        # Load hdf5 with this naming scheme: /Volumes/aind/scratch/andrew.shelton/NPUltra_data/raw_npultra_data/2024-05-14_714527/behavior/OptoTagging_714527_20240514_110641.hdf5
        hdf5_files = [f for f in os.listdir(units_table_path) if f.endswith('.hdf5') and f.startswith('RFMapping')]
        if hdf5_files:
            hdf5_file = hdf5_files[0]
            hdf5_file_path = os.path.join(units_table_path, hdf5_file)

# Initialize lists to store keys and structure info
all_keys = []
dataset_info = {}
group_info = {}

# Open and inspect the HDF5 file
with h5py.File(hdf5_file_path, 'r') as f:
    root_keys = list(f.keys())
    print("Root level keys:", root_keys)
    
    # Function to recursively explore and store structure
    def explore_group(group, level=0, parent_path=""):
        indent = "  " * level
        for key in group.keys():
            full_path = f"{parent_path}/{key}" if parent_path else key
            all_keys.append(full_path)
            
            item = group[key]
            if isinstance(item, h5py.Group):
                print(f"{indent}{key}/ (Group)")
                group_info[full_path] = {"type": "group", "level": level}
                explore_group(item, level + 1, full_path)
            elif isinstance(item, h5py.Dataset):
                print(f"{indent}{key} (Dataset): shape={item.shape}, dtype={item.dtype}")
                dataset_info[full_path] = {
                    "type": "dataset", 
                    "shape": item.shape, 
                    "dtype": str(item.dtype),
                    "level": level
                }
    
    explore_group(f)

# Now you have the keys stored in variables:
print(f"\nTotal keys found: {len(all_keys)}")
print(f"Dataset keys: {list(dataset_info.keys())}")
print(f"Group keys: {list(group_info.keys())}")

Root level keys: ['acquisitionSignalLine', 'amNoiseFreq', 'behavNidaqDevice', 'behavNidaqDeviceSerialNum', 'computerName', 'configPath', 'deltaWheelPos', 'digitalSolenoidTrigger', 'diodeBoxPosition', 'diodeBoxSize', 'drawDiodeBox', 'frameIntervals', 'frameRate', 'frameSignalLine', 'fullFieldContrast', 'galvoChannels', 'gammaErrorPolicy', 'githubTaskScript', 'gratingContrast', 'gratingEdge', 'gratingEdgeBlurWidth', 'gratingOri', 'gratingSF', 'gratingSize', 'gratingTF', 'gridX', 'gridY', 'interStimFrames', 'lastFrame', 'lickDetectorFrames', 'lickFrames', 'lickLine', 'manualRewardFrames', 'maxBlocks', 'maxFrames', 'maxTrials', 'maxWheelAngleChange', 'microphoneCh', 'microphoneData', 'minWheelAngleChange', 'monBackgroundColor', 'monDistance', 'monGamma', 'monSizePix', 'monWidth', 'networkNidaqDevices', 'optoChannels', 'optoNidaqDevice', 'optoNidaqDeviceSerialNum', 'optoParamsPath', 'optoSampleRate', 'pixelsPerDeg', 'rewardFrames', 'rewardLine', 'rewardSize', 'rewardSoundDeviceOpenTime', 'r