This notebook creates the data necessary for both evaluation and selection of waveforms to sort in the current version of DeepSpikeSort (ultimately, it will be used only for evaluation).

In [None]:
import os

import numpy as np
import pandas as pd

from one.api import ONE

Install the latest version of SpikeInterface as recommended in the **From source** section [here](https://spikeinterface.readthedocs.io/en/latest/get_started/installation.html).

In [None]:
import spikeinterface.full as si

print(f"SpikeInterface version: {si.__version__}")

Install local functions

In [None]:
from deepspikesort.create_dataset import process_peaks
from deepspikesort import preprocessing


## 1. Read recording session

For this project, we will be using session [sub-CSHL049](https://dandiarchive.org/dandiset/000409/draft/files?location=sub-CSHL049&page=1) of the [IBL Brain Wide Map Dataset](https://dandiarchive.org/dandiset/000409/draft). 

In [7]:
data_folder = "../data/sub-CSHL049"

os.makedirs(data_folder, exist_ok=True)

In order to obtain this data, we will stream with ONE API using its identifier which is listed in the [metadata](https://api.dandiarchive.org/api/dandisets/000409/versions/draft/assets/7e4fa468-349c-44a9-a482-26898682eed1/).

In [None]:
one = ONE(
    base_url="https://openalyx.internationalbrainlab.org",
    username="intbrainlab",
    password="international",
    silent=True
)

eid = "c99d53e6-c317-4c53-99ba-070b26673ac4"
pids, _ = one.eid2pid(eid)
pid = pids[0]

Using SpikeInterface, we can read and save the data to disk. 

In [None]:
extractors_folder = os.path.join(data_folder, "extractors")

os.makedirs(extractors_folder, exist_ok=True)

### Recording

In [None]:
preprocessed_folder = os.path.join(extractors_folder, "preprocessed")

if not os.path.exists(preprocessed_folder):
    recording = se.read_ibl_recording(eid=eid, stream_name='probe00.ap', cache_folder=one_folder)

    # Preprocess the recording
    recording_f = spre.bandpass_filter(recording, freq_min=300, freq_max=6000)
    recording_cmr = spre.common_reference(recording_f, reference='global', operator='median')

    # Save the preprocessed recording to disk
    job_kwargs = dict(n_jobs=10, chunk_duration="1s", progress_bar=True)
    recording_cmr.save(folder=preprocessed_folder, **job_kwargs)
else:
    recording_cmr = si.load_extractor(preprocessed_folder)

recording_cmr

In [9]:
channel_locations_file = os.path.join(data_folder, "channel_locations.npy")

if not os.path.exists(channel_locations_file):
    channel_locations = preprocessing.extract_channels(recording_cmr)
    np.save(channel_locations_file, channel_locations)
else:
    channel_locations = np.load(channel_locations_file)

display(pd.DataFrame(channel_locations))

Unnamed: 0,channel_index,channel_location_x,channel_location_y
0,0,16,0
1,1,48,0
2,2,0,20
3,3,32,20
4,4,16,40
...,...,...,...
379,379,32,3780
380,380,16,3800
381,381,48,3800
382,382,0,3820


### Sorting

In [None]:
sorting_folder = os.path.join(extractors_folder, "sorting")

if not os.path.exists(sorting_folder):
    sorting = si.read_ibl_sorting(pid)
    sorting.save(folder=sorting_folder)
else:
    sorting = si.load_extractor(sorting_folder)

sorting

### Sorting Analyzer

In [None]:
analyzer_folder = os.path.join(extractors_folder, "analyzer")

if not os.path.exists(analyzer_folder):
    analyzer = si.create_sorting_analyzer(
        sorting=sorting,
        recording=recording_cmr,
        format="memory"
    )

    # Compute extensions
    job_kwargs = dict(n_jobs=10, chunk_duration="1s", progress_bar=True)
    compute_dict = {
        'random_spikes': {'method': 'uniform'},
        'waveforms': {'ms_before': 1.0, 'ms_after': 2.0},
        'templates': {'operators': ["average", "median", "std"]}
    }
    analyzer.compute(compute_dict, **job_kwargs)

    # Save the sorting analyzer to disk
    analyzer.save_as(folder=analyzer_folder, format="binary_folder")
else:
    analyzer = si.load_sorting_analyzer(analyzer_folder)

analyzer

---

## 2. Detect peaks

In [None]:
peaks_folder = '../data/sub-CSHL049/peaks'
peaks_file = os.path.join(peaks_folder, "peaks.npy")

if os.path.exists(peaks_file):
    peaks_filtered = np.load(peaks_file)
else:
    os.makedirs(peaks_folder, exist_ok=True)

    job_kwargs = dict(chunk_duration='1s', n_jobs=10, progress_bar=True)

    peaks = detect_peaks(
        recording_cmr,
        method='locally_exclusive',
        peak_sign='neg',
        detect_threshold=6,
        radius_um = 100,
        **job_kwargs
    )

    peaks_filtered = process_peaks.filter_peaks(recording_cmr, peaks)

    np.save(peaks_file, peaks_filtered)

display(pd.DataFrame(peaks_filtered))

Unnamed: 0,sample_index,channel_index,amplitude
0,93,326,-27.0
1,147,348,-40.0
2,177,337,-67.0
3,207,6,-54.0
4,269,330,-34.0
...,...,...,...
3260855,125189311,222,-36.0
3260856,125189392,273,-24.0
3260857,125189402,89,-37.0
3260858,125189402,269,-21.0


## 3. Match peaks to spikes

In [None]:
spikes_folder = os.path.join(data_folder, "spikes")
os.makedirs(spikes_folder, exist_ok=True)

spikes_file = os.path.join(spikes_folder, "spikes.npy")

if not os.path.exists(spikes_file):
    spikes = preprocessing.extract_spikes(sorting, analyzer, channels)
    np.save(spikes_file, spikes)
else:
    spikes = np.load(spikes_file)

display(pd.DataFrame(spikes))

Unnamed: 0,spike_index,sample_index,channel_index,channel_location_x,channel_location_y,unit_index
0,0,472,341,48.0,3400.0,271
1,1,511,361,48.0,3600.0,306
2,2,606,354,0.0,3540.0,297
3,3,680,361,48.0,3600.0,306
4,4,715,325,48.0,3240.0,235
...,...,...,...,...,...,...
4604408,4604408,125188816,21,48.0,200.0,26
4604409,4604409,125188838,155,32.0,1540.0,105
4604410,4604410,125188912,325,48.0,3240.0,237
4604411,4604411,125188967,326,0.0,3260.0,239


In [None]:
peaks_matched_file = os.path.join(peaks_folder, "peaks_matched.npy")

if os.path.exists(peaks_matched_file):
    peaks_matched = np.load(peaks_matched_file)
else:
    peaks_matched = process_peaks.match_peaks(peaks_filtered, spikes)
    np.save(peaks_matched_file, peaks_matched)

display(pd.DataFrame(peaks_matched))

Unnamed: 0,peak_index,time,channel_index,channel_location_x,channel_location_y,amplitude,unit_index
0,0,93,326,0,3260,-27,-1
1,1,147,348,16,3480,-40,-1
2,2,177,337,48,3360,-67,-1
3,3,207,6,0,60,-54,-1
4,4,269,330,0,3300,-34,-1
...,...,...,...,...,...,...,...
3260855,3260855,125189311,222,0,2220,-36,-1
3260856,3260856,125189392,273,48,2720,-24,-1
3260857,3260857,125189402,89,48,880,-37,-1
3260858,3260858,125189402,269,48,2680,-21,-1
