In [169]:
import json
import pandas as pd
import os
import copy

In [178]:
# Import the recording session summary table 
recording_summary = "/Volumes/scratch/andrew.shelton/NPUltra_data/raw_npultra_data/NPUltra_recording_summary.xlsx"
recording_summary_table = pd.read_excel(recording_summary)

# Filter table for sessions of interest  
filtered_sessions = recording_summary_table[
    (recording_summary_table['experiment'] == 'NPUltra_psychedelics') &
    (recording_summary_table['uploaded to CO'] == 'yes')]

filtered_sessions.head()

Unnamed: 0,session,subject_id,sex,DOB,DOD,age,rec_date,recording_age,num_recordings,recording_rig,genotype,.csv,metadata,uploaded to CO,sorted,flags,data_dir,experiment,notes
67,2024-05-14_714527,714527,F,2023-11-30,2024-05-22,174,2024-05-14,166,1.0,NP.1,Sim1-Cre;Ai32,yes,yes,yes,yes,,\\allen\aind\scratch\andrew.shelton\NPUltra_da...,NPUltra_psychedelics,
68,2024-05-15_714527,714527,F,2023-11-30,2024-05-22,165,2024-05-15,167,1.0,NP.1,Sim1-Cre;Ai32,yes,yes,yes,yes,,\\allen\aind\scratch\andrew.shelton\NPUltra_da...,NPUltra_psychedelics,
69,2024-05-16_714789,714789,F,2023-12-01,2024-05-24,173,2024-05-16,167,1.0,NP.1,Sim1-Cre;Ai32,yes,yes,yes,yes,,\\allen\aind\scratch\andrew.shelton\NPUltra_da...,NPUltra_psychedelics,
70,2024-05-17_714789,714789,F,2023-12-01,2024-05-24,175,2024-05-17,168,1.0,NP.1,Sim1-Cre;Ai32,yes,yes,yes,yes,,\\allen\aind\scratch\andrew.shelton\NPUltra_da...,NPUltra_psychedelics,
71,2024-06-04_717033,717033,M,2023-12-18,2024-06-05,170,2024-06-04,169,1.0,NP.1,Tlx3-Cre;Ai167,yes,yes,yes,yes,,\\allen\aind\scratch\andrew.shelton\NPUltra_da...,NPUltra_psychedelics,


In [179]:
# Load session metadata 

# Iterate through each session folder on VAST for sessions of interest
session_list = filtered_sessions['session'].tolist()
session_list = session_list[:1] # For testing, only take the first session 

for session in range(len(session_list)):
    base_path = "/Volumes/scratch/andrew.shelton/NPUltra_data/raw_npultra_data/"
    session_folder = f"{base_path}{session_list[session]}"

    # Navigate to the processed_data/units/ folder within the session folder 
    if os.path.exists(session_folder):
        metadata_path = os.path.join(session_folder, "metadata")
        if os.path.exists(metadata_path): 
            # load session.json and rig.json files 
            session_json_path = os.path.join(metadata_path, "session.json")
            rig_json_path = os.path.join(metadata_path, "rig.json")
            if os.path.exists(session_json_path) and os.path.exists(rig_json_path):
                with open(session_json_path, 'r') as f:
                    session_metadata = json.load(f)
                with open(rig_json_path, 'r') as f:
                    rig_metadata = json.load(f)

                # Print the session metadata
                print("Session Metadata:", session_metadata)
                print("Rig Metadata:", rig_metadata)


Session Metadata: {'describedBy': 'https://raw.githubusercontent.com/AllenNeuralDynamics/aind-data-schema/main/src/aind_data_schema/core/session.py', 'schema_version': '0.2.1', 'protocol_id': '121-01-026-20', 'experimenter_full_name': ['Andrew Shelton'], 'session_start_time': '2024-05-14T10:25:34.114677+00:00', 'session_end_time': '2024-05-14T13:22:08.190834+00:00', 'session_type': 'A study on the effect of psilocybin on neocortical L5 pyramidal neurons using NP UHD (Switchable) probes', 'iacuc_protocol': '2104', 'rig_id': '325_NP1_Ultra_20240506', 'calibrations': [], 'maintenance': [], 'subject_id': '714527', 'animal_weight_prior': 24.8, 'animal_weight_post': None, 'weight_unit': 'gram', 'data_streams': [{'stream_start_time': '2024-05-14T10:25:34.114677+00:00', 'stream_end_time': '2024-05-14T13:22:08.190834+00:00', 'daq_names': ['Sync'], 'stim_modalities': 'Behavior'}, {'stream_start_time': '2024-05-14T10:25:34.114677+00:00', 'stream_end_time': '2024-05-14T13:22:08.190834+00:00', 'daq

In [65]:
def deep_merge(obj1, obj2, preserve_keys=None):
    """ Merges two json objects, preserves specific keys and handles conflicts"""
    if preserve_keys is None:
        preserve_keys = set()

    result = {}

    keys = set(obj1) | set(obj2)
    for key in keys:
        val1 = obj1.get(key)
        val2 = obj2.get(key)

        if key in obj1 and key in obj2:
            if key in preserve_keys:
                # Assert values are equal or choose one
                if val1 != val2:
                    raise ValueError(f"Conflicting values for preserved key '{key}': {val1} vs {val2}")
                result[key] = val1
            elif isinstance(val1, dict) and isinstance(val2, dict):
                result[key] = deep_merge(val1, val2, preserve_keys)
            elif isinstance(val1, list) and isinstance(val2, list):
                # Customize list merge logic here (e.g., deduplication)
                result[key] = list(set(val1 + val2))
            else:
                # If both are scalars but not preserved, choose one or raise error
                result[key] = val2  # or raise ValueError or custom logic
        elif key in obj1:
            result[key] = val1
        else:
            result[key] = val2

    return result
    

In [None]:
# Create an unlinked copy of session_metadata 
modified_session_json = copy.deepcopy(session_metadata)

# Session['data_streams'] fixes 
# merge the two behavior data streams 
merged = deep_merge(session_metadata['data_streams'][0], session_metadata['data_streams'][1], 
                    preserve_keys={"stream_start_time", "stream_end_time"})
# fix the camera names to match rig 
merged['camera_names'] = ['Face forward', 'Body', 'Eye', 'Probe Camera']
del merged['stim_modalities']
merged['stream_modalities'].append({'name': 'Behavior', 'abbreviation': 'behavior'})

# create new data stream for ecephys 
ecephys_data_stream = {
    "stream_modalities": [{'name': 'Extracellular electrophysiology', 'abbreviation': 'ecephys'}],
    "stream_start_time": merged['stream_start_time'],
    "stream_end_time": merged['stream_end_time']
}
# update data streams object 
modified_session_json['data_streams'] = [merged, ecephys_data_stream]

# Session['stimulus_epoch'] fixes
for epoch in modified_session_json['stimulus_epochs']: # fix parameters 
    del epoch['software'][0]['parameters']['taskScript']
    del epoch['software'][0]['parameters']['taskControl']
    del epoch['software'][0]['parameters']['taskUtils']
    del epoch['script']['parameters']['taskScript']
    del epoch['script']['parameters']['taskControl']
    del epoch['script']['parameters']['taskUtils']

    if epoch['stimulus_name'] == 'OptoTagging':
        epoch['stimulus_modalities'] = ['Optogenetics']
        # copy laser_duration, optotagging_locations_bregmaX, optotagging_locations_bregmaY, probes_targeted from the light_source_config object and move to software.parameters 
        if 'light_source_config' in epoch:
            epoch['software'][0]['parameters']['laser_duration'] = epoch['light_source_config'].get('laser_duration', None)
            epoch['software'][0]['parameters']['optotagging_locations_bregmaX'] = epoch['light_source_config'].get('optotagging_locations_bregmaX', None)
            epoch['software'][0]['parameters']['optotagging_locations_bregmaY'] = epoch['light_source_config'].get('optotagging_locations_bregmaY', None)
            epoch['software'][0]['parameters']['probes_targeted'] = epoch['light_source_config'].get('probes_targeted', None)
            # remove the copied fields from light_source_config 
            del epoch['light_source_config']['laser_duration']
            del epoch['light_source_config']['optotagging_locations_bregmaX']
            del epoch['light_source_config']['optotagging_locations_bregmaY']
            del epoch['light_source_config']['probes_targeted']

    if epoch['stimulus_name'] == 'RFMapping': 
        epoch['stimulus_parameters']['type'] = 'Sin'
        # change field names 
        epoch['stimulus_parameters']['trial_duration'] = epoch['stimulus_parameters'].pop('duration_sec', None)
        epoch['stimulus_parameters']['orientations'] = epoch['stimulus_parameters'].pop('orientations_deg', None)
        epoch['stimulus_parameters']['stimulus_size'] = epoch['stimulus_parameters'].pop('size_deg', None)
        epoch['stimulus_parameters']['spatial_frequency_cycles'] = epoch['stimulus_parameters'].pop('spatial_frequency_cycles_per_deg', None)
        epoch['stimulus_parameters']['temporal_frequency_cycles'] = epoch['stimulus_parameters'].pop('temporal_frequency_cycles_per_sec', None)

        # add new fields for units 
        epoch['stimulus_parameters']['trial_duration_unit'] = 'seconds'
        epoch['stimulus_parameters']['orientations_unit'] = 'degrees'
        epoch['stimulus_parameters']['stimulus_size_unit'] = 'degrees'  
        epoch['stimulus_parameters']['spatial_frequency_cycles_unit'] = 'cycles per degree'
        epoch['stimulus_parameters']['temporal_frequency_cycles_unit'] = 'seconds'

    if epoch['stimulus_name'] == 'Spontaneous':
        epoch['notes'] = 'low-luminance black screen'

In [None]:
# RIG fixes 

# Create an unlinked copy of rig_metadata
modified_rig_json = copy.deepcopy(rig_metadata)

# Change laser name to match session.json 
for light_source in modified_rig_json['light_sources']: 
    if light_source['device_type'] == 'Laser':
        if light_source['wavelength'] == 488: 
            light_source['name'] = 'laser_488'

        if light_source['wavelength'] == 633: 
            light_source['name'] = 'laser_633' 

modified_rig_json['origin'] = 'Bregma'
# Create 3 objects for the rig axes, copy from OpenScope metadata 
modified_rig_json['rig_axes'] = {}
modified_rig_json['rig_axes'][0] = {'direction': 'layers on the Mouse Sagittal Plane, Positive direction is towards the nose of the mouse',
                                    'name' : 'X'}
modified_rig_json['rig_axes'][1] = {'direction': 'positive pointing UP opposite the direction from the force of gravity',
                                    'name' : 'Y'}
modified_rig_json['rig_axes'][2] = {'direction': 'defined by the right hand rule and the other two axis',
                                    'name' : 'Z'}

In [198]:
# Save the modified session and rig metadata to new output JSON files 
output_path = '/Volumes/scratch/suyee.lee/'

output_rig_json_path = os.path.join(output_path, "modified_rig.json")
with open(output_rig_json_path, 'w') as f:
    json.dump(modified_rig_json, f, indent=4)

output_session_json_path = os.path.join(output_path, "modified_session.json")
with open(output_session_json_path, 'w') as f:
    json.dump(modified_session_json, f, indent=4)