#  Loading Data

### NWB

#### Running a conversion
First we run one with only the raw data:
* Intan raw data.
* Behavioral data.
* Stimulus data.

In [None]:
from pathlib import Path

data_folder = Path("/media/heberto/One Touch/DiCarlo-CN-data-share")
assert data_folder.is_dir(), f"Data directory not found: {data_folder}"

stimuli_folder = data_folder / "StimulusSets" / "RSVP-domain_transfer" / "images"
stub_test = True
verbose = True


from dicarlo_lab_to_nwb.conversion.data_locator import locate_intan_file_path, locate_mworks_processed_file_path
from dicarlo_lab_to_nwb.conversion.convert_session import convert_session_to_nwb

session_metadata = {
    "image_set_name": "domain-transfer-2023",
    "session_date": "20230215",
    "session_time": "161322",
    "subject": "pico",
}

# These two functions is where we encode your data organization structure.
intan_file_path = locate_intan_file_path(data_folder=data_folder, **session_metadata)
mworks_processed_file_path = locate_mworks_processed_file_path(data_folder=data_folder, **session_metadata)


convert_session_to_nwb(
    session_metadata=session_metadata,
    intan_file_path=intan_file_path,
    mworks_processed_file_path=mworks_processed_file_path,
    stimuli_folder=stimuli_folder,
    stub_test=stub_test,
    verbose=verbose,
)

We can also include the threshold crossings and the psths data by running the same conversion scripts with enchanced options.

In [None]:
data_folder = Path("/media/heberto/One Touch/DiCarlo-CN-data-share")
assert data_folder.is_dir(), f"Data directory not found: {data_folder}"

stimuli_folder = data_folder / "StimulusSets" / "RSVP-domain_transfer" / "images"
stub_test = True
verbose = True


from dicarlo_lab_to_nwb.conversion.data_locator import locate_intan_file_path, locate_mworks_processed_file_path
from dicarlo_lab_to_nwb.conversion.convert_session import convert_session_to_nwb

session_metadata = {
    "image_set_name": "domain-transfer-2023",
    "session_date": "20230215",
    "session_time": "161322",
    "subject": "pico",
}

# These two functions is where we encode your data organization structure.
intan_file_path = locate_intan_file_path(data_folder=data_folder, **session_metadata)
mworks_processed_file_path = locate_mworks_processed_file_path(data_folder=data_folder, **session_metadata)

thresholindg_pipeline_kwargs = {
    "f_notch": 60.0,  # Frequency for the notch filter
    "bandwidth": 10.0,  # Bandwidth for the notch filter
    "f_low": 300.0,  # Low cutoff frequency for the bandpass filter
    "f_high": 6000.0,  # High cutoff frequency for the bandpass filter
    "noise_threshold": 3,  # Threshold for detection in the thresholding algorithm
}

# Ten bins starting 200 ms before the stimulus and spanning 400 ms
psth_kwargs = {"bins_span_milliseconds": 400, "num_bins": 10, "milliseconds_from_event_to_first_bin": -200.0}

convert_session_to_nwb(
    session_metadata=session_metadata,
    intan_file_path=intan_file_path,
    mworks_processed_file_path=mworks_processed_file_path,
    stimuli_folder=stimuli_folder,
    add_thresholding_events=True,
    thresholindg_pipeline_kwargs=thresholindg_pipeline_kwargs,
    add_psth=True,
    psth_kwargs=psth_kwargs,
    stub_test=stub_test,
    verbose=verbose,
)

#### Loading an NWBFile
After running the script on `conversion.convert_session.py` the produced nwbfile can be loaded like this

In [None]:
from pathlib import Path
from pynwb import NWBHDF5IO

# Change this for the location of the NWB files in your system
stub_file = False
nwb_folder_path = Path.home() / "conversion_nwb"   

if stub_file:
    nwb_folder_path = nwb_folder_path / "nwb_stub"
    assert nwb_folder_path.is_dir()

nwbfile_path = nwb_folder_path / "pico_20230214_140610.nwb"
assert nwbfile_path.is_file(), f"{nwbfile_path} does not exist"


io = NWBHDF5IO(nwbfile_path, mode="r")
nwbfile = io.read()
nwbfile

#### Trials Table

In [None]:
nwbfile.trials.to_dataframe().columns

In [None]:
columns = [
    "start_time",
    "stop_time",
    "stimulus_presented",
    "fixation_correct",
    "stimuli_block_index",
]

nwbfile.trials.to_dataframe()[columns].sample(n=5)

#### Calculate compression ratio

In [None]:
hdf5_dataset = nwbfile.acquisition["ElectricalSeries"].data
size_uncompressed_GiB = hdf5_dataset.nbytes / 1024 ** 3
print(f"Size of the uncompressed ElectricalSeries: {size_uncompressed_GiB:.2f} GiB")

In [None]:
size_compressed_GiB = hdf5_dataset.id.get_storage_size() / 1024 ** 3
print(f"Size of the compressed ElectricalSeries: {size_compressed_GiB:.2f} GiB")

In [None]:
compression_ratio = hdf5_dataset.nbytes / hdf5_dataset.id.get_storage_size() 
print(f"Compression ratio: {compression_ratio:.2f}")

In [None]:
space_saving = 1 - size_compressed_GiB / size_uncompressed_GiB
print(f"Space saving: {space_saving:.2f} %")

#### PSTHs Binned Aligned Spikes

In [None]:
nwbfile.processing["ecephys"]["BinnedAlignedSpikesStimulusID0"]

In [None]:
nwbfile.processing["ecephys"]["BinnedAlignedSpikesStimulusID0"].data[:]

In [None]:
nwbfile.processing["ecephys"]["BinnedAlignedSpikesStimulusID0"].event_timestamps[:]

#### Display stimuli

In [None]:
an_image = nwbfile.stimulus["stimuli"].images["im0"]

an_image_data = an_image.data[:]

import matplotlib.pyplot as plt

plt.imshow(an_image_data)

#### Electrode Table 

In [None]:
columns = [
    "channel_name",
    "probe",
    "rel_x",
    "rel_y",
    "electrode_impedance_magnitude",
    "electrode_impedance_phase",
]
nwbfile.electrodes.to_dataframe()[columns].sample(n=5)

#### Units Table

In [None]:
nwbfile.units.to_dataframe().sample(n=5)

### Amplifiers

This is an example of how to load data.

In [None]:
from pathlib import Path
from dicarlo_lab_to_nwb.conversion.data_locator import locate_intan_file_path

data_folder = Path("/media/heberto/One Touch/DiCarlo-CN-data-share")
image_set_name = "domain-transfer-2023"
subject = "pico"
session_date = "20230214"
session_time = "140610"

intan_file_path = locate_intan_file_path(
    data_folder=data_folder,
    image_set_name=image_set_name,
    subject=subject,
    session_date=session_date,
    session_time=session_time,
)

from spikeinterface.extractors import IntanRecordingExtractor
recording = IntanRecordingExtractor(
    file_path=intan_file_path,  
    stream_name="RHD2000 amplifier channel",
    all_annotations=True,
    ignore_integrity_checks=False,
)
recording

This particular example has timestamps discontinuities, to load the data regardless we set the parameter `ignore_integrity_checks=True`.

In [None]:
from spikeinterface.extractors import IntanRecordingExtractor

recording = IntanRecordingExtractor(
    file_path=intan_file_path,
    stream_name="RHD2000 amplifier channel",
    all_annotations=True,  # the .rhd file
    ignore_integrity_checks=True,
)
recording

### Auxiliary input

In [None]:
recording_auxiliary_input = IntanRecordingExtractor(
    file_path=intan_file_path,
    stream_name="RHD2000 auxiliary input channel",
    all_annotations=True,
    ignore_integrity_checks=True,
)

recording_auxiliary_input

### ADC input

In [None]:
recording_adc_input = IntanRecordingExtractor(
    file_path=intan_file_path,
    stream_name="USB board ADC input channel",
    all_annotations=True,
    ignore_integrity_checks=True,
)

recording_adc_input

### Digital channel 
Requires neo version from github https://github.com/NeuralEnsemble/python-neo/

In [None]:
recording_digital = IntanRecordingExtractor(
    file_path=intan_file_path,
    stream_name="USB board digital input channel",
    all_annotations=True,
    ignore_integrity_checks=True,
)

recording_digital

## Loading the probe

In [None]:
from dicarlo_lab_to_nwb.conversion.probe import build_probe_group
from dicarlo_lab_to_nwb.conversion.data_locator import locate_intan_file_path
from spikeinterface.extractors import IntanRecordingExtractor

data_folder = Path("/media/heberto/One Touch/DiCarlo-CN-data-share")
image_set_name = "domain-transfer-2023"
subject = "pico"
session_date = "20230214"
session_time = "140610"


intan_file_path = locate_intan_file_path(
    data_folder=data_folder,
    image_set_name=image_set_name,
    subject=subject,
    session_date=session_date,
    session_time=session_time,
)


stream_name = "RHD2000 amplifier channel"
recording = IntanRecordingExtractor(
    file_path=intan_file_path,
    stream_name=stream_name,
    ignore_integrity_checks=True,
    all_annotations=True,
)


probe_group = build_probe_group(recording=recording)


from probeinterface.plotting import plot_probe
import matplotlib.pyplot as plt
import numpy as np



fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(111)

probe = probe_group.probes[0]
channel_ids = recording.get_channel_ids()
corresponding_channel_ids = [channel_ids[i] for i in probe.device_channel_indices]

text_on_contact = np.asarray(corresponding_channel_ids)

plot_probe(probe=probe, ax=ax, with_contact_id=True, text_on_contact=text_on_contact)

In [None]:
from probeinterface.plotting import plot_probe_group

fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(111)


plot_probe_group(probe_group, ax=ax, same_axes=True, with_contact_id=False)


# Sorting Pipeline

To run a sorting pipeline we need a recording with a geometry attached.

In [None]:
from spikeinterface.extractors import IntanRecordingExtractor
from spikeinterface.sorters import run_sorter_by_property


from dicarlo_lab_to_nwb.conversion.data_locator import locate_intan_file_path
from dicarlo_lab_to_nwb.conversion.probe import attach_probe_to_recording

data_folder = Path("/media/heberto/One Touch/DiCarlo-CN-data-share")
image_set_name = "domain-transfer-2023"
subject = "pico"
session_date = "20230214"
session_time = "140610"


intan_file_path = locate_intan_file_path(
    data_folder=data_folder,
    image_set_name=image_set_name,
    subject=subject,
    session_date=session_date,
    session_time=session_time,
)


stream_name = "RHD2000 amplifier channel"
recording = IntanRecordingExtractor(
    file_path=intan_file_path,
    stream_name=stream_name,
    ignore_integrity_checks=True,
    all_annotations=True,
)


attach_probe_to_recording(recording=recording)
recording

Most sorters have been designed with high density probes in mind. They will work with a single channel probe, but the results may not be as good as some units might be supressed by the spatial regularization.

Because of this we performed sorting in two ways so you can compare the results:

1. We do one sorting per probe
2. We do one sorting per channel to avoid interference of the spatial regularization



## Performing a sorting per probe

In [None]:
from spikeinterface.core import load_extractor

sorting_folder = Path("./sorting_done")
overwrite = False

if sorting_folder.exists() and not overwrite:
    sorting = load_extractor(sorting_folder)
else:
    sorting = run_sorter_by_property(
        sorter_name="kilosort2",
        recording=recording,
        folder="./sorting_folder_probe",
        grouping_property="probe",
        docker_image=True,
    )

    sorting.save(folder=sorting_folder)
    

In [None]:
sorting

In [None]:
from spikeinterface.core import create_sorting_analyzer


sorting_analyzer = create_sorting_analyzer(sorting=sorting, recording=recording)




## Performing a sorting per channel

In [None]:
sorting = run_sorter_by_property(
    sorter_name="kilosort3",
    recording=recording,
    folder="./sorting_folder_per_channel",
    grouping_property="channel_names",
    docker_image=True,
)


In [None]:
from spikeinterface.sorters import available_sorters

available_sorters()

In [None]:
from spikeinterface.core import load_extractor

sorting_folder = Path("./sorting_done_per_channel")
overwrite = False

if sorting_folder.exists() and not overwrite:
    sorting = load_extractor(sorting_folder)
else:
    sorting = run_sorter_by_property(
        sorter_name="tridesclous",
        recording=recording,
        folder="./sorting_folder_per_channel",
        grouping_property="channel_names",
        docker_image=True,
    )

    sorting.save(folder=sorting_folder)
    


# Peak Detection Pipeline

## Artificial data

In [None]:
import spikeinterface.widgets as sw

from spikeinterface.core.generate import generate_ground_truth_recording


recording, sorting = generate_ground_truth_recording(num_channels=4, num_units=1, durations=[1], seed=0)


w_ts = sw.plot_traces(recording, time_range=(0, 1))
w_rs = sw.plot_rasters(sorting, time_range=(0, 1))

In [None]:
import numpy as np
from dicarlo_lab_to_nwb.conversion.pipeline import thresholding_pipeline


job_kwargs = dict(n_jobs=1, progress_bar=True, chunk_duration=1.0)
noise_threshold = 3  # The number of standard deviations for peak detection

spike_times_per_channel = thresholding_pipeline(
    recording=recording,
    noise_threshold=noise_threshold,
    job_kwargs=job_kwargs,
    verbose=True,
)

In [None]:
sorting.get_unit_spike_train(0, return_times=True)

In [None]:
spike_times_per_channel[0][0] * 1000.0

## Intan Recording data

In [None]:
from pathlib import Path

import spikeinterface.widgets as sw
from dicarlo_lab_to_nwb.conversion.data_locator import locate_intan_file_path

from spikeinterface.extractors import IntanRecordingExtractor


data_folder = Path("/media/heberto/One Touch/DiCarlo-CN-data-share")
image_set_name = "domain-transfer-2023"
subject = "pico"
session_date = "20230214"
session_time = "140610"


intan_file_path = locate_intan_file_path(
    data_folder=data_folder,
    image_set_name=image_set_name,
    subject=subject,
    session_date=session_date,
    session_time=session_time,
)



recording = IntanRecordingExtractor(
    file_path=intan_file_path,
    stream_name="RHD2000 amplifier channel",
    all_annotations=True,
    ignore_integrity_checks=True,
)

# If you want to select only one channel
channel_ids = recording.get_channel_ids()[0:1]
single_channel_recording = recording.select_channels(channel_ids=channel_ids)
w_ts = sw.plot_traces(single_channel_recording, time_range=(0, 1), return_scaled=True)


#### Preprocess

In [None]:
from dicarlo_lab_to_nwb.conversion.pipeline import DiCarloBandPass, DiCarloNotch


f_notch = 60  # Hz
bandwidth = 10
f_low = 300.0
f_high = 6000.0

vectorized = True 
notched_recording = DiCarloNotch(single_channel_recording, f_notch=f_notch, bandwidth=bandwidth, vectorized=vectorized)
preprocessed_recording = DiCarloBandPass(notched_recording, f_low=f_low, f_high=f_high, vectorized=vectorized)

# For this instance each array 96 channels, 400 micrometes apart
w_ts = sw.plot_traces(preprocessed_recording, time_range=(0, 1), return_scaled=True)

#### Run the peak detection on a short portion of the data

In [None]:
from dicarlo_lab_to_nwb.conversion.pipeline import thresholding_pipeline

noise_threshold = 3  # The number of standard deviations for peak detection

start_time = 0
end_time = 10.0

preprocessed_recording = preprocessed_recording.time_slice(start_time=start_time, end_time=end_time)

spike_times_per_channel = thresholding_pipeline(
    recording=preprocessed_recording,
    noise_threshold=noise_threshold,
)

spike_times_per_channel

#### Everything can be wrapped up in a couple of lines

In [None]:
from spikeinterface.extractors import IntanRecordingExtractor
from dicarlo_lab_to_nwb.conversion.data_locator import locate_intan_file_path

data_folder = Path("/media/heberto/One Touch/DiCarlo-CN-data-share")
image_set_name = "domain-transfer-2023"
subject = "pico"
session_date = "20230214"
session_time = "140610"


intan_file_path = locate_intan_file_path(
    data_folder=data_folder,
    image_set_name=image_set_name,
    subject=subject,
    session_date=session_date,
    session_time=session_time,
)

stream_name = "RHD2000 amplifier channel"
recording = IntanRecordingExtractor(
    file_path=intan_file_path,
    stream_name=stream_name,
    ignore_integrity_checks=True,
    all_annotations=True,
)

from dicarlo_lab_to_nwb.conversion.pipeline import thresholding_pipeline
# Parameters of the pipeline
f_notch = 60  # Hz
bandwidth = 10
f_low = 300.0
f_high = 6000.0
noise_threshold = 3  # The number of standard deviations for peak detection

stub_test = False   
if stub_test:
    recording = recording.time_slice(start_time=0, end_time=60.0 * 5)

spike_times_per_channel = thresholding_pipeline(
    recording=recording,
    f_notch=f_notch,
    bandwidth=bandwidth,
    f_low=f_low,
    f_high=f_high,
    noise_threshold=noise_threshold,
    verbose=True,
)

The output of the pipeline is a dictionary whose keys are the channel ids and the values are the times (in seconds) at which the threshold was crossed.

In [None]:
spike_times_per_channel["A-000"]

#### The algorithm can be run from NWB

In [None]:
from pathlib import Path
from dicarlo_lab_to_nwb.conversion.pipeline import thresholding_pipeline


# Change this for the location of the NWB files in your system
nwb_folder_path = Path.home() / "conversion_nwb"   


nwbfile_path = nwb_folder_path / "pico_20230214_140610.nwb"

from spikeinterface.extractors import NwbRecordingExtractor

recording = NwbRecordingExtractor(file_path=nwbfile_path)

# Parameters of the pipeline
f_notch = 60  # Hz
bandwidth = 10
f_low = 300.0
f_high = 6000.0
noise_threshold = 3  # The number of standard deviations for peak detection

stub_test = False   
if stub_test:
    recording = recording.time_slice(start_time=0, end_time=60.0 * 5)

spike_times_per_channel = thresholding_pipeline(
    recording=recording,
    f_notch=f_notch,
    bandwidth=bandwidth,
    f_low=f_low,
    f_high=f_high,
    noise_threshold=noise_threshold,
    verbose=True,
)


# Calculating PSTH

In [None]:
%load_ext autoreload
%autoreload 
from pathlib import Path

from spikeinterface.extractors import IntanRecordingExtractor
from dicarlo_lab_to_nwb.conversion.pipeline import thresholding_pipeline
from dicarlo_lab_to_nwb.conversion.data_locator import locate_intan_file_path
from dicarlo_lab_to_nwb.conversion.probe import attach_probe_to_recording

data_folder = Path("/media/heberto/One Touch/DiCarlo-CN-data-share")
image_set_name = "domain-transfer-2023"
subject = "pico"
session_date = "20230214"
session_time = "140610"

# Parameters of the pipeline
f_notch = 60  # Hz
bandwidth = 10
f_low = 300.0
f_high = 6000.0
noise_threshold = 3  # The number of standard deviations for peak detection

intan_file_path = locate_intan_file_path(
    data_folder=data_folder,
    image_set_name=image_set_name,
    subject=subject,
    session_date=session_date,
    session_time=session_time,
)


stream_name = "RHD2000 amplifier channel"
recording = IntanRecordingExtractor(
    file_path=intan_file_path,
    stream_name=stream_name,
    ignore_integrity_checks=True,
    all_annotations=True,
)


attach_probe_to_recording(recording=recording)
chunk_duration = 10.0  # 10 seconds
job_kwargs = dict(n_jobs=-1, progress_bar=True, chunk_duration=chunk_duration)
verbose = True 

dict_of_recordings = recording.split_by(property="probe", outputs="dict")
dict_of_spikes_times_per_channel = {}

for probe_name, recording in dict_of_recordings.items():
    spikes_times_per_channel = thresholding_pipeline(
        recording=recording,
        f_notch=f_notch,
        bandwidth=bandwidth,
        f_low=f_low,
        f_high=f_high,
        noise_threshold=noise_threshold,
        job_kwargs=job_kwargs,
        verbose=verbose
    )
    
    dict_of_spikes_times_per_channel[probe_name] = spikes_times_per_channel

# We merge all the dictionaries
dict_of_spikes_times = {key: value for d in dict_of_spikes_times_per_channel.values() for key, value in d.items()}

In [None]:
import pandas as pd
from dicarlo_lab_to_nwb.conversion.data_locator import locate_mworks_processed_file_path

mworks_processed_file_path = locate_mworks_processed_file_path(
    data_folder=data_folder,
    image_set_name=image_set_name,
    subject=subject,
    session_date=session_date,
    session_time=session_time,
)


mworks_processed_file_path = Path(mworks_processed_file_path)
dtype = {"stimulus_presented": np.uint32, "fixation_correct": bool}
mwkorks_df = pd.read_csv(mworks_processed_file_path, dtype=dtype)
ground_truth_time_column = "samp_on_us"
stimuli_presentation_times_seconds = mwkorks_df[ground_truth_time_column] / 1e6
stimuli_presentation_id = mwkorks_df["stimulus_presented"]
stimuli_ids = stimuli_presentation_id.unique()
stimuli_ids_sorted = sorted(stimuli_ids) 
# Sort the stimuli by file-name

stimuli_presentation_times_dict = {
    stimulus_id: stimuli_presentation_times_seconds[stimuli_presentation_id == stimulus_id].values for stimulus_id in stimuli_ids_sorted
}

# Be sure that the list is sorted by unit/site name.
unit_ids = list(dict_of_spikes_times.keys())
unit_ids_sorted = sorted(unit_ids)
spike_times_list = [dict_of_spikes_times[id] for id in unit_ids_sorted]

In [None]:
from dicarlo_lab_to_nwb.conversion.psth import calculate_event_psth

number_of_bins = 10
bins_span_milliseconds = 400.0
bin_width_in_milliseconds = bins_span_milliseconds / number_of_bins
#This means the first bin starts 200 ms before the image presentation
milliseconds_from_event_to_first_bin = -200.0  # 
max_repetitions = stimuli_presentation_id.value_counts().max()

# Let's calculate the PSTH for a single stimuli
a_stimuli = stimuli_ids_sorted[0]
stimulus_presentation_times = stimuli_presentation_times_dict[a_stimuli]
psth_per_stimuli = calculate_event_psth(
    spike_times_list=spike_times_list,
    event_times_seconds=stimulus_presentation_times,
    bin_width_in_milliseconds=bin_width_in_milliseconds,
    number_of_bins = number_of_bins,
    milliseconds_from_event_to_first_bin=milliseconds_from_event_to_first_bin,
    number_of_events=max_repetitions,
)

psth_per_stimuli[1, ...]

In [None]:
from dicarlo_lab_to_nwb.conversion.psth import calculate_event_psth_numpy_naive

stimulus_presentation_times = stimuli_presentation_times_dict[a_stimuli]
psth_per_stimuli = calculate_event_psth_numpy_naive(
    spike_times_list=spike_times_list,
    event_times_seconds=stimulus_presentation_times,
    bin_width_in_milliseconds=bin_width_in_milliseconds,
    number_of_bins = number_of_bins,
    milliseconds_from_event_to_first_bin=milliseconds_from_event_to_first_bin,
    number_of_events=max_repetitions,
)

psth_per_stimuli[1, ...]

### Agregate psth for all stimuli in session

In [None]:
from tqdm.auto import tqdm
from dicarlo_lab_to_nwb.conversion.psth import calculate_event_psth
import time

number_of_units = len(spike_times_list)
number_of_stimuli = len(stimuli_presentation_times_dict)

session_psth = np.full(
    shape=(number_of_units, number_of_stimuli, max_repetitions, number_of_bins), fill_value=np.nan
)
desc = "Calculating PSTH for stimuli"

time_start = time.time()
for stimulus_index, stimuli_id in enumerate(tqdm(stimuli_ids_sorted, desc=desc, unit=" stimuli processed")):
    stimulus_presentation_times = stimuli_presentation_times_dict[stimuli_id]
    psth_per_stimuli = calculate_event_psth(
        spike_times_list=spike_times_list,
        event_times_seconds=stimulus_presentation_times, 
        bin_width_in_milliseconds=bin_width_in_milliseconds,
        number_of_bins=number_of_bins,
        milliseconds_from_event_to_first_bin=milliseconds_from_event_to_first_bin,
        number_of_events=max_repetitions,
    )
    session_psth[:, stimulus_index, :, :] = psth_per_stimuli
    
session_psth_numba = session_psth

time_stop = time.time()
time_numba = time_stop - time_start
print(f"Time elapsed: {time_numba:2.2f} seconds")

In [None]:
print(f"session_psth.shape: {session_psth.shape}, {number_of_stimuli=}, {max_repetitions=}, {number_of_bins=}, {number_of_units=}")

To transform the data into the DiCarlo lab format a simple transport suffices

In [None]:
session_psth.transpose((1, 2, 3, 0)).shape

To compare to a baseline this is the naive numpy implementation

In [None]:
from tqdm import tqdm
from dicarlo_lab_to_nwb.conversion.psth import calculate_event_psth_numpy_naive
import time 

number_of_units = len(spike_times_list)
number_of_stimuli = len(stimuli_presentation_times_dict)

session_psth = np.full(
    shape=(number_of_units, number_of_stimuli, max_repetitions, number_of_bins), fill_value=np.nan
)

start = time.time()
for stimulus_index, (stimulus_id, stimulus_times) in enumerate(tqdm(stimuli_presentation_times_dict.items(), desc="Processing Stimuli")):
    psth_per_stimuli = calculate_event_psth_numpy_naive(
        spike_times_list=spike_times_list,
        event_times_seconds=stimulus_times,  # make sure this is correct
        bin_width_in_milliseconds=bin_width_in_milliseconds,
        number_of_bins=number_of_bins,
        milliseconds_from_event_to_first_bin=milliseconds_from_event_to_first_bin,
        number_of_events=max_repetitions,
    )
    session_psth[:, stimulus_index, :, :] = psth_per_stimuli
    
session_psth_naive = session_psth

time_stop = time.time()
time_naive = time_stop - time_start
print(f"Time elapsed: {time_naive:2.2f} seconds")
time_naive_minutes = time_naive / 60.0
print(f"Time elapsed: {time_naive_minutes:2.2f} minutes")

In [None]:
np.allclose(session_psth_numba, session_psth_naive, equal_nan=True)

On my local machine this is ~10x faster than the naive numpy implementation

### Calculating PSTH from NWBFile

In [None]:
from pathlib import Path
from pynwb import NWBHDF5IO

nwb_folder_path = Path.home() / "conversion_nwb"   

nwbfile_path = nwb_folder_path / "pico_20230214_140610.nwb"
assert nwbfile_path.is_file(), f"{nwbfile_path} does not exist"


io = NWBHDF5IO(nwbfile_path, mode="r")
nwbfile = io.read()

In [None]:
from dicarlo_lab_to_nwb.conversion.psth import build_psth_from_nwbfile

number_of_bins = 10
bins_span_milliseconds = 400.0
bin_width_in_milliseconds = bins_span_milliseconds / number_of_bins
#This means the first bin starts 200 ms before the image presentation
milliseconds_from_event_to_first_bin = -200.0  

psth_dict, stimuli_presentation_times_dict = build_psth_from_nwbfile(
    nwbfile=nwbfile,
    bin_width_in_milliseconds=bin_width_in_milliseconds,
    number_of_bins=number_of_bins,
    milliseconds_from_event_to_first_bin=milliseconds_from_event_to_first_bin,
)

# Aggregating PSTHs from multiple sesssions

In [None]:
from pathlib import Path
from pynwb import NWBHDF5IO
import numpy as np

# Change this for the location of the NWB files in your system
nwb_folder_path = Path.home() / "conversion_nwb"   

# Here we could filter for a type of experiment using the naming convention, the foler structure or the metadata in the file
file_names_available = [path.name for path in nwb_folder_path.iterdir() if path.is_file() and path.suffix == ".nwb"]

nwbfile_list = []
for file_name in file_names_available:
    nwbfile_file_path = nwb_folder_path / file_name
    io = NWBHDF5IO(nwbfile_file_path, 'r')
    nwbfile = io.read()
    nwbfile_list.append(nwbfile)
    
    

# We have a list per experimental session
psth_dict_list = []
for nwbfile in nwbfile_list:
    interfaces = nwbfile.processing["ecephys"].data_interfaces.values()
    is_binned_spikes = lambda interface: interface.data_type == "BinnedAlignedSpikes"
    valid_interfaces = [interface for interface in interfaces if is_binned_spikes(interface)]
    psth_dict = {interface.name: interface.data for interface in valid_interfaces}
    psth_dict_list.append(psth_dict)
    

all_stimuli = set().union(*[psth_dict.keys() for psth_dict in psth_dict_list])
# Aggregate psth per stimuli over NWBFiles
psth_per_stimuli_dict = {}
for stimuli_id in all_stimuli:
    stimuli_psth_list = (psth_dict.get(stimuli_id, None) for psth_dict in psth_dict_list)
    stimuli_psth_list = [psth for psth in stimuli_psth_list if psth is not None]
    stimuli_psth_aggregated = np.concatenate(stimuli_psth_list, axis=1)
    psth_per_stimuli_dict[stimuli_id] = stimuli_psth_aggregated

# Calculate the max number of repetitions for any stimuli    
max_repetitions = max([psth.shape[1] for psth in psth_per_stimuli_dict.values()])
num_units = list(psth_per_stimuli_dict.values())[0].shape[0]
num_bins = list(psth_per_stimuli_dict.values())[0].shape[2]
di_carlo_shape = (len(all_stimuli), max_repetitions, num_bins, num_units)

# Coerce to di carlo format shape and fill with nan
aggregated_psth = np.full(shape=di_carlo_shape, fill_value=np.nan)
for stimuli_index, stimuli_psth in enumerate(psth_per_stimuli_dict.values()):
    psth_di_carlo = stimuli_psth.transpose(1, 2, 0)
    events_per_stimuli = stimuli_psth.shape[1]
    aggregated_psth[stimuli_index, :events_per_stimuli, ...] = psth_di_carlo
    
aggregated_psth.shape
