# Load small dandiset

author: laquitainesteeve@gmail.com

purpose: create small demo dataset (neuropixels evoked 20 KHz)

# Setup 

Activate dandi virtual environment (envs/dandi.yml)

```bash
python -m ipykernel install --user --name dandi --display-name "dandi"
```

In [1]:
%%time 

# import python packages
import numpy as np
from dandi.dandiapi import DandiAPIClient
import spikeinterface.extractors as se
import spikeinterface.sorters as ss
import spikeinterface
from pynwb.file import NWBFile, Subject
from pynwb import NWBHDF5IO
import uuid
from datetime import datetime
from dateutil.tz import tzlocal
print("spikeinterface", spikeinterface.__version__)
from time import time

  from .autonotebook import tqdm as notebook_tqdm


ImportError: cannot import name 'SortingAnalyzer' from 'spikeinterface' (/home/jovyan/steevelaquitaine/spikebias/envs/dandi/lib/python3.10/site-packages/spikeinterface/__init__.py)

## Custom functions

In [2]:
def get_memory_size(recording):
    """get size of RecordingExtractor in GB
    """
    num_channels = recording.get_num_channels()
    num_frames = recording.get_num_frames()
    dtype_size = np.dtype(recording.get_dtype()).itemsize
    size_bytes = num_channels * num_frames * dtype_size
    size_gb = size_bytes / (1024**3)  # Convert bytes to GB
    return size_gb

## Load dandiset

In [3]:
%%time

# load dandiset (npx, evoked, 20Khz)
dandiset_id = '001250'
filepath = 'sub-002-fitted/sub-002-fitted_ecephys.nwb'

# get the file path on S3
with DandiAPIClient() as client:
    asset = client.get_dandiset(dandiset_id, 'draft').get_asset_by_path(filepath)
    s3_path = asset.get_content_url(follow_redirects=1, strip_query=True)
print("s3_path:", s3_path)

# get RecordingExtractor
Recording = se.NwbRecordingExtractor(file_path=s3_path, stream_mode="remfile")
Sorting = se.NwbSortingExtractor(file_path=s3_path, stream_mode="remfile")

# report
print('\n', Recording)
print('\n', Sorting)

s3_path: https://dandiarchive.s3.amazonaws.com/blobs/9d6/6ed/9d66ed40-af31-43aa-b4ba-246d2206dcad

 NwbRecordingExtractor: 384 channels - 20.0kHz - 1 segments - 72,359,964 samples 
                       3,618.00s (1.00 hours) - float32 dtype - 103.51 GiB
  file_path: https://dandiarchive.s3.amazonaws.com/blobs/9d6/6ed/9d66ed40-af31-43aa-b4ba-246d2206dcad

 NwbSortingExtractor: 1836 units - 1 segments - 20.0kHz
  file_path: https://dandiarchive.s3.amazonaws.com/blobs/9d6/6ed/9d66ed40-af31-43aa-b4ba-246d2206dcad
CPU times: user 174 ms, sys: 8.7 ms, total: 183 ms
Wall time: 1.61 s


In [5]:
%%time 

# Re-wire probe because it is not always recognized by kilosort4
probe = Recording.get_probe()
Recording.set_probe(probe)

# unit-test
assert "layers" in Recording.get_property_keys(), "RecordingExtractor should contain layer property"

CPU times: user 19.6 ms, sys: 7.91 ms, total: 27.5 ms
Wall time: 176 ms


  warn("There is no Probe attached to this recording. Creating a dummy one with contact positions")


## Make the small dataset

In [44]:
# select layer 5, 6 (most of the activity)
selected_layers = ['L5', 'L6']
channel_ids = Recording.channel_ids
channel_ids = channel_ids[np.isin(Recording.get_property('layers'), selected_layers)]
SmallRecording = Recording.channel_slice(channel_ids=channel_ids)
print("\nRecording:", SmallRecording)

# select first 2 minutes (~500 MB)
sampling_rate = Recording.get_sampling_frequency() 
start_frame = 0
end_frame = sampling_rate * 60
SmallRecording = SmallRecording.frame_slice(start_frame=start_frame, end_frame=end_frame)
SmallSorting = Sorting.frame_slice(start_frame=start_frame, end_frame=end_frame)

print("\nRecording:", SmallRecording)
print("\nSorting:", SmallSorting)

# unit-test
# - layers
# - max spike times lower than number of frames
assert (np.unique(SmallRecording.get_property('layers'))==selected_layers).all(), "layers are not correct"

max_spike_time = max([SmallSorting.get_unit_spike_train(unit_id=unit).tolist() for unit in SmallSorting.get_unit_ids()])[0]
assert max_spike_time < end_frame, "max spike timestamp should be lower that the number of frames"

# Write [TODO]


Recording: ChannelSliceRecording: 120 channels - 20.0kHz - 1 segments - 72,359,964 samples 
                       3,618.00s (1.00 hours) - float32 dtype - 32.35 GiB

Recording: FrameSliceRecording: 120 channels - 20.0kHz - 1 segments - 1,200,000 samples 
                     60.00s (1.00 minutes) - float32 dtype - 549.32 MiB

Sorting: FrameSliceSorting: 1836 units - 1 segments - 20.0kHz
