In [28]:
!pip install pynwb



In [30]:
!pip install nlb_tools



In [67]:
using_collab = False

In [71]:
import sys
import numpy as np
import pandas as pd
if using_collab:
    from google.colab import drive
    drive.mount('/content/gdrive/')
    sys.path.append('/content/gdrive/MyDrive/MLNBD/Project')

### DATA LOADING AND CREATING CSVS (IMPLEMENT ONLY ONCE)

In [119]:
from pynwb import NWBHDF5IO

# Define paths
if using_collab:
    file_paths = {
        "train": "/content/gdrive/MyDrive/MLNBD/Project/sub-Jenkins_ses-small_desc-train_behavior+ecephys.nwb",
        "test": "/content/gdrive/MyDrive/MLNBD/Project/sub-Jenkins_ses-small_desc-test_ecephys (3).nwb"
    }
else:
    file_paths = {
        "train": 'Original_Data/sub-Jenkins_ses-small_desc-train_behavior+ecephys.nwb',
        "test": 'Original_Data/sub-Jenkins_ses-small_desc-test_ecephys.nwb'
    }

# Function to open and explore an NWB file
def explore_nwb(file_path):
    with NWBHDF5IO(file_path, 'r') as io:
        nwbfile = io.read()
        print(nwbfile)

for file_type, file_path in file_paths.items():
    print(f"Exploring {file_type} file:")
    explore_nwb(file_path)
    print("\n" + "="*50 + "\n")

Exploring train file:


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


root pynwb.file.NWBFile at 0x4502785152
Fields:
  devices: {
    electrode_array_M1 <class 'pynwb.device.Device'>,
    electrode_array_PMd <class 'pynwb.device.Device'>
  }
  electrode_groups: {
    electrode_group_M1 <class 'pynwb.ecephys.ElectrodeGroup'>,
    electrode_group_PMd <class 'pynwb.ecephys.ElectrodeGroup'>
  }
  electrodes: electrodes <class 'hdmf.common.table.DynamicTable'>
  experiment_description: Center-out delayed reaching task with maze barriers.
  experimenter: ['Mark M. Churchland' 'Matthew T. Kaufman']
  file_create_date: [datetime.datetime(2021, 9, 1, 22, 14, 12, 620183, tzinfo=tzoffset(None, -14400))]
  identifier: 76ff7eba-0b93-11ec-a90d-3da92aaa0e59
  institution: Stanford University
  intervals: {
    trials <class 'pynwb.epoch.TimeIntervals'>
  }
  keywords: <StrDataset for HDF5 dataset "keywords": shape (5,), type "|O">
  lab: Shenoy
  processing: {
    behavior <class 'pynwb.base.ProcessingModule'>
  }
  related_publications: ['https://doi.org/10.1016/j.ne

In [134]:
from pynwb import NWBHDF5IO
import numpy as np
import os

# Define paths for train and test files (change if Google Colab)
if using_collab:
    file_paths = {
        "train": "/content/gdrive/MyDrive/MLNBD/Project/sub-Jenkins_ses-small_desc-train_behavior+ecephys.nwb",
        "test": "/content/gdrive/MyDrive/MLNBD/Project/sub-Jenkins_ses-small_desc-test_ecephys (3).nwb"
    }
else:
    file_paths = {
        "train": 'Original_Data/sub-Jenkins_ses-small_desc-train_behavior+ecephys.nwb',
        "test": 'Original_Data/sub-Jenkins_ses-small_desc-test_ecephys.nwb'
    }

# Create a directory for processed data if it doesn't exist
os.makedirs("processed_data/behavior_data", exist_ok=True)

# Define function to process NWB file
def process_nwb_file(file_path, file_type):
    with NWBHDF5IO(file_path, 'r') as io:
        nwbfile = io.read()

        # Display basic metadata
        print(f"Processing {file_type} file")
        print("Session Description:", nwbfile.session_description)
        print("Session ID:", nwbfile.session_id)
        print("Session Start Time:", nwbfile.session_start_time)
        print("Experiment Description:", nwbfile.experiment_description)
        print("Experimenter(s):", nwbfile.experimenter)
        print("Institution:", nwbfile.institution)
        print("Lab:", nwbfile.lab)
        print("File Creation Date:", nwbfile.file_create_date)
        print("Identifier:", nwbfile.identifier)
        print("Related Publications:", nwbfile.related_publications)
        print("Keywords:", nwbfile.keywords)

        # Process subject information
        subject = nwbfile.subject
        if subject:
            print("Subject Information:")
            print("  Sex:", subject.sex)
            print("  Species:", subject.species)
            print("  Subject ID:", subject.subject_id)

        # Process devices
        print("\nDevices:")
        for name, device in nwbfile.devices.items():
            print(f"  {name}: {device}")

        # Process electrode groups
        print("\nElectrode Groups:")
        for name, group in nwbfile.electrode_groups.items():
            print(f"  {name}: {group}")

        # Save electrodes table
        electrodes = nwbfile.electrodes
        if electrodes:
            electrodes_df = electrodes.to_dataframe()
            electrodes_df.to_csv(f'processed_data/{file_type}_Electrode_table.csv', index=False)

        # Save trials table
        trials = nwbfile.trials
        if trials:
            trials_df = trials.to_dataframe()
            trials_df.to_csv(f'processed_data/{file_type}_Trials.csv', index=False)

        # Save units table
        units = nwbfile.units
        if units:
            units_df = units.to_dataframe()
            units_df.to_csv(f'processed_data/{file_type}_Units.csv', index=False)

        # Save time intervals
        for name, interval in nwbfile.intervals.items():
            interval_df = interval.to_dataframe()
            interval_df.to_csv(f'processed_data/{file_type}_{name}_Intervals.csv', index=False)

        # Process behavior data in processing modules
        print("\nProcessing Modules:")
        for module_name, module in nwbfile.processing.items():
            if module_name == "behavior":
                for data_interface_name, data_interface in module.data_interfaces.items():
                    if hasattr(data_interface, 'to_dataframe'):
                        df = data_interface.to_dataframe()
                        df.to_csv(f'processed_data/behavior_data/{file_type}_{module_name}_{data_interface_name}.csv', index=False)
                    else:
                        interface = nwbfile.processing[module_name].data_interfaces[data_interface_name]
                        data = interface.data[:]
                        timestamps = interface.timestamps[:]
                        np.save(f"processed_data/behavior_data/{file_type}_{data_interface_name}_data.npy", data)
                        np.save(f"processed_data/behavior_data/{file_type}_{data_interface_name}_timestamps.npy", timestamps)

        # Process acquisition data
        print("\nAcquisition Data:")
        for name, acquisition in nwbfile.acquisition.items():
            if hasattr(acquisition, 'data'):
                data = acquisition.data[:]
                np.save(f"processed_data/behavior_data/{file_type}_{name}_acquisition_data.npy", data)

        # Process stimulus data
        print("\nStimulus Data:")
        for name, stimulus in nwbfile.stimulus.items():
            if hasattr(stimulus, 'data'):
                data = stimulus.data[:]
                np.save(f"processed_data/behavior_data/{file_type}_{name}_stimulus_data.npy", data)

# Run the function for both train and test files
for file_type, file_path in file_paths.items():
    process_nwb_file(file_path, file_type)

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Processing train file
Session Description: Data from monkey Jenkins performing center-out delayed reaching task. This file contains trials from the full session on 2009-09-28 that can be used for training models for the Neural Latents Benchmark.
Session ID: small
Session Start Time: 2009-09-28 00:00:00-07:00
Experiment Description: Center-out delayed reaching task with maze barriers.
Experimenter(s): ('Mark M. Churchland', 'Matthew T. Kaufman')
Institution: Stanford University
Lab: Shenoy
File Creation Date: [datetime.datetime(2021, 9, 1, 22, 14, 12, 620183, tzinfo=tzoffset(None, -14400))]
Identifier: 76ff7eba-0b93-11ec-a90d-3da92aaa0e59
Related Publications: ('https://doi.org/10.1016/j.neuron.2010.09.015',)
Keywords: <StrDataset for HDF5 dataset "keywords": shape (5,), type "|O">
Subject Information:
  Sex: M
  Species: Macaca mulatta
  Subject ID: Jenkins

Devices:
  electrode_array_M1: electrode_array_M1 pynwb.device.Device at 0x6096341520
Fields:
  description: 96-electrode Utah ar

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."



Processing Modules:

Acquisition Data:

Stimulus Data:


In [136]:
from pynwb import NWBHDF5IO
import numpy as np
import os

# Define paths for train and test files
if using_collab:
    file_paths = {
        "train": "/content/gdrive/MyDrive/MLNBD/Project/sub-Jenkins_ses-small_desc-train_behavior+ecephys.nwb",
        "test": "/content/gdrive/MyDrive/MLNBD/Project/sub-Jenkins_ses-small_desc-test_ecephys (3).nwb"
    }
else:
    file_paths = {
        "train": 'Original_Data/sub-Jenkins_ses-small_desc-train_behavior+ecephys.nwb',
        "test": 'Original_Data/sub-Jenkins_ses-small_desc-test_ecephys.nwb'
    }

# Create a directory for processed data if it doesn't exist
os.makedirs("processed_data/behaviour_data", exist_ok=True)

# Define function to process NWB file
def process_nwb_file(file_path, file_type):
    with NWBHDF5IO(file_path, 'r') as io:
        nwbfile = io.read()

        # Display basic metadata
        print(f"Processing {file_type} file")
        print("Session Description:", nwbfile.session_description)
        print("Session ID:", nwbfile.session_id)
        print("Session Start Time:", nwbfile.session_start_time)
        print("Experiment Description:", nwbfile.experiment_description)
        print("Experimenter(s):", nwbfile.experimenter)
        print("Institution:", nwbfile.institution)
        print("Lab:", nwbfile.lab)
        print("File Creation Date:", nwbfile.file_create_date)
        print("Identifier:", nwbfile.identifier)
        print("Related Publications:", nwbfile.related_publications)
        print("Keywords:", nwbfile.keywords)

        # Uncommented portions to process all needed sections

        # Process subject information
        subject = nwbfile.subject
        if subject:
            print("Subject Information:")
            print("  Sex:", subject.sex)
            print("  Species:", subject.species)
            print("  Subject ID:", subject.subject_id)

        # Process devices
        print("\nDevices:")
        for name, device in nwbfile.devices.items():
            print(f"  {name}: {device}")

        # Process electrode groups
        print("\nElectrode Groups:")
        for name, group in nwbfile.electrode_groups.items():
            print(f"  {name}: {group}")

        # Save electrodes table
        electrodes = nwbfile.electrodes
        if electrodes:
            electrodes_df = electrodes.to_dataframe()
            electrodes_df.to_csv(f'processed_data/behavior_data/{file_type}_Electrode_table.csv', index=False)

        # Save trials table
        trials = nwbfile.trials
        if trials:
            trials_df = trials.to_dataframe()
            trials_df.to_csv(f'processed_data/behavior_data/{file_type}_Trials.csv', index=False)

        # Save units table
        units = nwbfile.units
        if units:
            units_df = units.to_dataframe()
            units_df.to_csv(f'processed_data/behavior_data/{file_type}_Units.csv', index=False)

        # Save time intervals
        for name, interval in nwbfile.intervals.items():
            interval_df = interval.to_dataframe()
            interval_df.to_csv(f'processed_data/behavior_data/{file_type}_{name}_Intervals.csv', index=False)

        # Process behavior data in processing modules
        print("\nProcessing Modules:")
        for module_name, module in nwbfile.processing.items():
            if module_name == "behavior":
                for data_interface_name, data_interface in module.data_interfaces.items():
                    if hasattr(data_interface, 'to_dataframe'):
                        df = data_interface.to_dataframe()
                        df.to_csv(f'processed_data/behavior_data/{file_type}_{module_name}_{data_interface_name}.csv', index=False)
                    else:
                        interface = nwbfile.processing[module_name].data_interfaces[data_interface_name]
                        data = interface.data[:]
                        timestamps = interface.timestamps[:]
                        np.save(f"processed_data/behavior_data/{file_type}_{data_interface_name}_data.npy", data)
                        np.save(f"processed_data/behavior_data/{file_type}_{data_interface_name}_timestamps.npy", timestamps)

        # Process acquisition data
        print("\nAcquisition Data:")
        for name, acquisition in nwbfile.acquisition.items():
            if hasattr(acquisition, 'data'):
                data = acquisition.data[:]
                np.save(f"processed_data/behavior_data/{file_type}_{name}_acquisition_data.npy", data)

        # Process stimulus data
        print("\nStimulus Data:")
        for name, stimulus in nwbfile.stimulus.items():
            if hasattr(stimulus, 'data'):
                data = stimulus.data[:]
                np.save(f"processed_data/behavior_data/{file_type}_{name}_stimulus_data.npy", data)

# Run the function for both train and test files
for file_type, file_path in file_paths.items():
    process_nwb_file(file_path, file_type)

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


Processing train file
Session Description: Data from monkey Jenkins performing center-out delayed reaching task. This file contains trials from the full session on 2009-09-28 that can be used for training models for the Neural Latents Benchmark.
Session ID: small
Session Start Time: 2009-09-28 00:00:00-07:00
Experiment Description: Center-out delayed reaching task with maze barriers.
Experimenter(s): ('Mark M. Churchland', 'Matthew T. Kaufman')
Institution: Stanford University
Lab: Shenoy
File Creation Date: [datetime.datetime(2021, 9, 1, 22, 14, 12, 620183, tzinfo=tzoffset(None, -14400))]
Identifier: 76ff7eba-0b93-11ec-a90d-3da92aaa0e59
Related Publications: ('https://doi.org/10.1016/j.neuron.2010.09.015',)
Keywords: <StrDataset for HDF5 dataset "keywords": shape (5,), type "|O">
Subject Information:
  Sex: M
  Species: Macaca mulatta
  Subject ID: Jenkins

Devices:
  electrode_array_M1: electrode_array_M1 pynwb.device.Device at 0x6096927056
Fields:
  description: 96-electrode Utah ar

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."



Processing Modules:

Acquisition Data:

Stimulus Data:


In [138]:
from pynwb import NWBHDF5IO

# Define paths for train and test files
if using_collab:
    file_paths = {
        "train": "/content/gdrive/MyDrive/MLNBD/Project/sub-Jenkins_ses-small_desc-train_behavior+ecephys.nwb",
        "test": "/content/gdrive/MyDrive/MLNBD/Project/sub-Jenkins_ses-small_desc-test_ecephys (3).nwb"
    }
else:
    file_paths = {
        "train": 'Original_Data/sub-Jenkins_ses-small_desc-train_behavior+ecephys.nwb',
        "test": 'Original_Data/sub-Jenkins_ses-small_desc-test_ecephys.nwb'
    }

# Define function to access and print behavior and electrode data
def extract_behavior_and_electrodes(nwb_file_path, file_type):
    with NWBHDF5IO(nwb_file_path, 'r') as io:
        nwbfile = io.read()

        print(f"\n--- {file_type.capitalize()} File Data ---")

        # Access behavior data in the 'behavior' module
        behavior_module = nwbfile.processing.get('behavior')
        if behavior_module is not None:
            # Access cursor position data
            cursor_pos = behavior_module.get_data_interface('cursor_pos')
            if cursor_pos:
                cursor_data = cursor_pos.data[:]
                cursor_time = cursor_pos.timestamps[:]
                print("\nCursor Position Data:", cursor_data[:10])  # Print first 10 for brevity
                print("Cursor Position Timestamps:", cursor_time[:10])  # Print first 10 for brevity

            # Access hand position data
            hand_pos = behavior_module.get_data_interface('hand_pos')
            if hand_pos:
                hand_data = hand_pos.data[:]
                hand_time = hand_pos.timestamps[:]
                print("\nHand Position Data:", hand_data[:10])  # Print first 10 for brevity
                print("Hand Position Timestamps:", hand_time[:10])  # Print first 10 for brevity

        # Access electrodes data
        electrodes_table = nwbfile.electrodes
        if electrodes_table is not None:
            # Extract 'x', 'y', and 'z' coordinates from electrodes table
            electrode_x = electrodes_table['x'][:]
            electrode_y = electrodes_table['y'][:]
            electrode_z = electrodes_table['z'][:]

            print("\nElectrode X Coordinates:", electrode_x[:10])  # Print first 10 for brevity
            print("Electrode Y Coordinates:", electrode_y[:10])  # Print first 10 for brevity
            print("Electrode Z Coordinates:", electrode_z[:10])  # Print first 10 for brevity

        print("\n" + "="*50 + "\n")

# Run the function for both train and test files
for file_type, file_path in file_paths.items():
    extract_behavior_and_electrodes(file_path, file_type)

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."



--- Train File Data ---

Cursor Position Data: [[-4.1         3.        ]
 [-4.07350645  3.00747767]
 [-4.04892157  3.01283136]
 [-4.02782931  3.0141464 ]
 [-4.01134637  3.01006292]
 [-4.          3.        ]
 [-3.99315545  2.98027996]
 [-3.99086671  2.95749284]
 [-3.99214892  2.93467865]
 [-3.99567857  2.91475739]]
Cursor Position Timestamps: [0.    0.001 0.002 0.003 0.004 0.005 0.006 0.007 0.008 0.009]

Hand Position Data: [[ -4.06939146 -31.88889289]
 [ -4.06694074 -31.90072274]
 [ -4.0645211  -31.91298044]
 [ -4.0621682  -31.92550114]
 [ -4.05982209 -31.93811223]
 [ -4.05750185 -31.95082761]
 [ -4.05533624 -31.96355149]
 [ -4.0531941  -31.97622613]
 [ -4.05118168 -31.9888123 ]
 [ -4.04944275 -32.00130926]]
Hand Position Timestamps: [0.    0.001 0.002 0.003 0.004 0.005 0.006 0.007 0.008 0.009]

Electrode X Coordinates: [nan nan nan nan nan nan nan nan nan nan]
Electrode Y Coordinates: [nan nan nan nan nan nan nan nan nan nan]
Electrode Z Coordinates: [nan nan nan nan nan nan nan na

### Exploratory Data Analysis

In [125]:
from nlb_tools.nwb_interface import NWBDataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [126]:
dataset_train = NWBDataset("Original_Data/sub-Jenkins_ses-small_desc-train_behavior+ecephys.nwb", split_heldout=False)
dataset_test = NWBDataset("Original_Data/sub-Jenkins_ses-small_desc-test_ecephys.nwb", split_heldout=False)

  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."


In [127]:
dataset_train.trial_info

  return method()


Unnamed: 0,trial_id,start_time,end_time,trial_type,trial_version,maze_id,success,target_on_time,go_cue_time,move_onset_time,rt,delay,num_targets,target_pos,num_barriers,barrier_pos,active_target,split
0,0,0 days 00:00:00,0 days 00:00:03.421000,11,1,76,True,0 days 00:00:00.848000,0 days 00:00:01.413000,0 days 00:00:02.171000,758,565,1,"[[-118, -83]]",9,"[[-33, 47, 37, 6], [-77, 48, 61, 11], [-64, -2...",0,val
1,1,0 days 00:00:03.500000,0 days 00:00:05.631000,12,0,77,True,0 days 00:00:04.176000,0 days 00:00:04.324000,0 days 00:00:04.580000,256,148,1,"[[-77, 82]]",0,[],0,train
2,2,0 days 00:00:05.700000,0 days 00:00:08.536000,4,1,4,True,0 days 00:00:06.359000,0 days 00:00:06.540000,0 days 00:00:06.910000,370,181,1,"[[-100, 35]]",9,"[[100, 38, 63, 14], [-146, 90, 54, 10], [51, 1...",0,train
3,3,0 days 00:00:08.600000,0 days 00:00:11.746000,8,2,10,True,0 days 00:00:09.361000,0 days 00:00:10.259000,0 days 00:00:10.607000,348,898,3,"[[-92, 81], [-91, -70], [117, 15]]",9,"[[-119, 48, 72, 8], [-47, 68, 9, 20], [-36, -8...",1,val
4,4,0 days 00:00:11.800000,0 days 00:00:14.706000,7,2,8,True,0 days 00:00:12.593000,0 days 00:00:13.225000,0 days 00:00:13.550000,325,632,3,"[[-92, 81], [-91, -70], [117, 15]]",9,"[[-119, 48, 72, 8], [-47, 68, 9, 20], [-36, -8...",0,train
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,95,0 days 00:04:38.600000,0 days 00:04:42.021000,11,2,76,True,0 days 00:04:39.552000,0 days 00:04:40.500000,0 days 00:04:40.837000,337,948,3,"[[133, -81], [-118, -83], [-77, 82]]",9,"[[-33, 47, 37, 6], [-77, 48, 61, 11], [-64, -2...",1,val
96,96,0 days 00:04:42.100000,0 days 00:04:44.466000,4,0,4,True,0 days 00:04:42.963000,0 days 00:04:43.161000,0 days 00:04:43.427000,266,198,1,"[[-100, 35]]",0,[],0,train
97,97,0 days 00:04:44.500000,0 days 00:04:47.376000,3,2,3,True,0 days 00:04:45.410000,0 days 00:04:45.892000,0 days 00:04:46.242000,350,482,3,"[[-100, 35], [125, -64], [104, 80]]",9,"[[100, 38, 63, 14], [-146, 90, 54, 10], [51, 1...",1,val
98,98,0 days 00:04:47.400000,0 days 00:04:50.736000,3,1,3,True,0 days 00:04:48.323000,0 days 00:04:49.238000,0 days 00:04:49.545000,307,915,1,"[[125, -64]]",9,"[[100, 38, 63, 14], [-146, 90, 54, 10], [51, 1...",0,val


In [132]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'