[`spikesorters_docker`](https://github.com/catalystneuro/spikesorters_docker/) is a test repo for trying to use dockerized environments for spike sorters within spikeinterface, using the [`hither`](https://github.com/flatironinstitute/hither) module. If this works, it would alleviate a headaches from installing different spikesorters coming from dependency issues or lacking Matlab licenses. 

### "Tutorial" script from github page

In [1]:
import spikeextractors as se
import spikesorters_docker as ss

# create a dumpable test example
rec, _ = se.example_datasets.toy_example(dumpable=True)

# run sorter in Docker container
ss.run_klusta(rec, output_folder="klusta_docker", use_docker=True) 

# by default, the following docker images are used
print(ss.default_docker_images)

09:52:25 [I] klustakwik KlustaKwik2 version git-7d8e9fa2-dirty
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if isinstance(start_frame, (float, np.float)):
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if isinstance(end_frame, (float, np.float)) and np.isfinite(end_frame):
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif isinstance(v, (np.int, np.int32, np.int64)):
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif isinstance(v, (np.float, np.float32, np.float64)):


Running in docker image docker://spikeinterface/klusta:0.1.0
HITHER JOBS: 1 pending; 0 queued; 0 running; 0 finished; 0 errored; 0 cache hits
Elapsed time for manage-pending-jobs: 15.561959981918335 sec
HITHER JOBS: 0 pending; 0 queued; 0 running; 1 finished; 0 errored; 0 cache hits
{'klusta': 'docker://spikeinterface/klusta:0.1.0', 'mountainsort4': 'docker://spikeinterface/mountainsort4:0.1.0', 'herdingspikes': 'docker://spikeinterface/herdingspikes:0.1.0'}


In [1]:
from pathlib import Path
import hither as hi  # should ultimately be hither2!
import time
import numpy as np
import spikesorters as ss
import spikeextractors as se
from spikesorters_docker.default_docker_images import default_docker_images

11:49:47 [I] klustakwik KlustaKwik2 version git-7d8e9fa2-dirty


### run_sorter function from spikesorters_docker

In [2]:
def run_sorter(sorter_name, recording, output_folder, delete_output_folder=False,
               grouping_property=None, parallel=False, verbose=False, raise_error=True, n_jobs=-1,
               joblib_backend='loky', use_docker=True, container=None,
               **params):
    output_folder = Path(output_folder)
    recording_json = output_folder / "recording_input.json"
    sorting_json = output_folder / "sorting_output.json"

    # dump recording
    recording.dump_to_json(output_folder / "recording_input.json")

    if use_docker:
        if container is None:
            assert sorter_name in default_docker_images, f"Default docker image for {sorter_name} not found"
            docker_image = default_docker_images[sorter_name]

        print(f"Running in docker image {docker_image}")
        
        # define hither function with container at run time
        @hi.function('run_sorter_docker_with_container', '0.1.0', image=docker_image)
        def run_sorter_docker_with_container(
                recording_json, sorter_name, **kwargs
        ):
            recording = se.load_extractor_from_json(recording_json)
            # run sorter
            t_start = time.time()
            sorting = ss.run_sorter(sorter_name, recording, **kwargs)
            t_stop = time.time()
            print(f'{sorter_name} run time {np.round(t_stop - t_start)}s')
            output_folder = Path(kwargs['output_folder'])
            sorting.dump_to_json(output_folder / 'sorting_output.json')

        sorting_job = run_sorter_docker_with_container.run(recording_json=recording_json, sorter_name=sorter_name,
                                                           output_folder=output_folder,
                                                           delete_output_folder=delete_output_folder,
                                                           grouping_property=grouping_property, parallel=parallel,
                                                           verbose=verbose, raise_error=raise_error, n_jobs=n_jobs,
                                                           joblib_backend=joblib_backend,
                                                           **params)
        sorting_job.wait()
        sorting = se.load_extractor_from_json(sorting_json)
    else:
        sorting = None
        print('Standard sorting is turned off, sorry!')

    return sorting

  and should_run_async(code)


Get chached data ready for spike sorting

In [2]:
import os

dir_name = r'/mnt/d/freelance-work/catalyst-neuro/hussaini-lab-to-nwb/example_data_raw'
base_filename = 'axona_raw_5s'
filename = os.path.join(dir_name, base_filename)
print(filename)

/mnt/d/freelance-work/catalyst-neuro/hussaini-lab-to-nwb/example_data_raw/axona_raw_5s


  and should_run_async(code)


In [3]:
r_cache = se.load_extractor_from_pickle(os.path.join(dir_name, 'cached_data_preproc.pkl'))

Versions are not the same. This might lead to errors. Use  spikeextractors version 0.9.5


### Klusta

In [5]:
sorting_KL_all = run_sorter(
    sorter_name='klusta',
    recording=r_cache, 
    output_folder=os.path.join(dir_name, 'klusta'), 
    adjacency_radius=50, 
    verbose=True, 
    threshold_weak_std_factor=3,
    use_docker=True
)
print('Found', len(sorting_KL_all.get_unit_ids()), 'units')

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif isinstance(v, (np.int, np.int32, np.int64)):
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif isinstance(v, (np.float, np.float32, np.float64)):


Running in docker image docker://spikeinterface/klusta:0.1.0
HITHER JOBS: 1 pending; 0 queued; 0 running; 0 finished; 0 errored; 0 cache hits
Elapsed time for manage-pending-jobs: 21.68818497657776 sec
HITHER JOBS: 0 pending; 0 queued; 0 running; 1 finished; 0 errored; 0 cache hits
Found 6 units


### Herdingspikes

In [None]:
output_folder = Path(os.path.join(dir_name, 'herdingspikes'))
recording_json = output_folder / "recording_input.json"
sorting_json = output_folder / "sorting_output.json"

# dump recording
r_cache.dump_to_json(output_folder / "recording_input.json")


# define hither function with container at run time
@hi.function('run_sorter_docker_with_container', '0.1.0',
             container='docker://spikeinterface/herdingspikes:0.1.0')
def run_sorter_docker_with_container(
        recording_json, sorter_name, **kwargs
):
    recording = se.load_extractor_from_json(recording_json)
    # run sorter
    t_start = time.time()
    sorting = ss.run_sorter(sorter_name, recording, **kwargs)
    t_stop = time.time()
    print(f'{sorter_name} run time {np.round(t_stop - t_start)}s')
    output_folder = Path(kwargs['output_folder'])
    sorting.dump_to_json(output_folder / 'sorting_output.json')
        
with hi.Config(container=True):
    sorting_job = run_sorter_docker_with_container.run(recording_json=recording_json,
                                                       sorter_name='herdingspikes',
                                                       output_folder=output_folder,
                                                       delete_output_folder=False,
                                                       grouping_property='group',
                                                       parallel=True,
                                                       verbose=True,
                                                       raise_error=True,
                                                       n_jobs=-1,
                                                       joblib_backend='loky')
    sorting_job.wait()

sorting = se.load_extractor_from_json(sorting_json)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif isinstance(v, (np.int, np.int32, np.int64)):
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif isinstance(v, (np.float, np.float32, np.float64)):


Pulling docker container: docker://spikeinterface/herdingspikes:0.1.0

Handling job: run_sorter_docker_with_container


In [None]:
sorting_HS = ss.run_sorter(
    sorter_name_or_class='herdingspikes',
    recording=r_cache,
    output_folder=os.path.join(dir_name, 'herdingspikes'),
    grouping_property='group',
    clustering_bandwidth=20,
    parallel=False,
    verbose=True,
    filter=False
)
print('Found', len(sorting_HS.get_unit_ids()), 'units')

In [6]:
sorting_HS = run_sorter(
    sorter_name='herdingspikes',
    recording=r_cache,
    output_folder=os.path.join(dir_name, 'herdingspikes'),
    grouping_property='group',
    clustering_bandwidth=20,
    verbose=True,
    use_docker=True
)
print('Found', len(sorting_HS.get_unit_ids()), 'units')

Running in docker image docker://spikeinterface/herdingspikes:0.1.0
Elapsed time for manage-pending-jobs: 2.0342297554016113 sec
HITHER JOBS: 0 pending; 0 queued; 0 running; 1 finished; 1 errored; 0 cache hits


Exception: Error in run_sorter_docker_with_container (0.1.0): Spike sorting failed: 'DataFrame' object has no attribute 'cl'. You can inspect the runtime trace in the herdingspikes.log of the output folder.'

### Mountainsort4

In [16]:
default_docker_images

{'klusta': 'docker://spikeinterface/klusta:0.1.0',
 'mountainsort4': 'docker://spikeinterface/mountainsort4:0.1.0',
 'herdingspikes': 'docker://spikeinterface/herdingspikes:0.1.0'}

In [26]:
def run_sorter(sorter_name, recording, output_folder, delete_output_folder=False,
               grouping_property=None, parallel=False, verbose=False, raise_error=True, n_jobs=-1,
               joblib_backend='loky', use_docker=True, container=None,
               **params):
    output_folder = Path(output_folder)
    recording_json = output_folder / "recording_input.json"
    sorting_json = output_folder / "sorting_output.json"

    # dump recording
    recording.dump_to_json(output_folder / "recording_input.json")

    if use_docker:
        if container is None:
            assert sorter_name in default_docker_images, f"Default docker image for {sorter_name} not found"
            docker_image = default_docker_images[sorter_name]
            
        print(f"Running in docker image {docker_image}")
        
        # define hither function with container at run time
        @hi.function('run_sorter_docker_with_container', '0.1.0',
                     image=docker_image)
        def run_sorter_docker_with_container(
                recording_json, sorter_name, **kwargs
        ):
            recording = se.load_extractor_from_json(recording_json)
            # run sorter
            t_start = time.time()
            sorting = ss.run_sorter(sorter_name, recording, **kwargs)
            t_stop = time.time()
            print(f'{sorter_name} run time {np.round(t_stop - t_start)}s')
            output_folder = Path(kwargs['output_folder'])
            sorting.dump_to_json(output_folder / 'sorting_output.json')

        sorting_job = run_sorter_docker_with_container.run(recording_json=recording_json, sorter_name=sorter_name,
                                                           output_folder=output_folder,
                                                           delete_output_folder=delete_output_folder,
                                                           grouping_property=grouping_property, parallel=parallel,
                                                           verbose=verbose, raise_error=raise_error, n_jobs=n_jobs,
                                                           joblib_backend=joblib_backend,
                                                           **params)
        sorting_job.wait()
        sorting = se.load_extractor_from_json(sorting_json)
    else:
        sorting = None
        print('Standard sorting is turned off, sorry!')

    return sorting

In [28]:
sorting_MS4 = run_sorter(
    recording=r_cache,
    sorter_name='mountainsort4',
    output_folder=os.path.join(dir_name, 'mountainsort4_group'),
    grouping_property='group',
    parallel=False,
    verbose=True,
    filter=False,
    use_docker=True
)
print(f'Mountainsort4 found {len(sorting_MS4.get_unit_ids())} units')

Running in docker image docker://spikeinterface/mountainsort4:0.1.0
HITHER JOBS: 1 pending; 0 queued; 2 running; 2 finished; 1 errored; 0 cache hits
Elapsed time for manage-pending-jobs: 130.25226593017578 sec


KeyboardInterrupt: 

__Running mountainsort with the following seems to work!!!__

Ah, but this works because I turned `parallel=False`, not because it runs in Docker!

In [31]:
output_folder = Path(os.path.join(dir_name, 'herdingspikes'))
recording_json = output_folder / "recording_input.json"
sorting_json = output_folder / "sorting_output.json"

# dump recording
r_cache.dump_to_json(output_folder / "recording_input.json")

In [32]:
# define hither function with container at run time
@hi.function('run_sorter_docker_with_container', '0.1.0',
             image='docker://spikeinterface/mountainsort4:0.1.0')
def run_sorter_docker_with_container(
        recording_json, sorter_name, **kwargs
):
    recording = se.load_extractor_from_json(recording_json)
    # run sorter
    t_start = time.time()
    sorting = ss.run_sorter(sorter_name, recording, **kwargs)
    t_stop = time.time()
    print(f'{sorter_name} run time {np.round(t_stop - t_start)}s')
    output_folder = Path(kwargs['output_folder'])
    sorting.dump_to_json(output_folder / 'sorting_output.json')
        
sorting_job = run_sorter_docker_with_container.run(recording_json=recording_json,
                                                   sorter_name='mountainsort4',
                                                   output_folder=output_folder,
                                                   delete_output_folder=False,
                                                   grouping_property='group',
                                                   parallel=True,
                                                   verbose=True,
                                                   raise_error=True,
                                                   n_jobs=-1,
                                                   joblib_backend='loky')
sorting_job.wait()
sorting = se.load_extractor_from_json(sorting_json)

HITHER JOBS: 1 pending; 0 queued; 3 running; 2 finished; 3 errored; 0 cache hits
Elapsed time for manage-pending-jobs: 44.76244854927063 sec


KeyboardInterrupt: 

In [25]:
sorting.sortings[0]

<spikeextractors.extractors.mdaextractors.mdaextractors.MdaSortingExtractor at 0x7ff4d71aaf40>

In [11]:
sorting_MS4 = run_sorter(
    recording=r_cache,
    sorter_name='mountainsort4',
    output_folder=os.path.join(dir_name, 'mountainsort4_group'),
    grouping_property='group',
    parallel=True,
    verbose=True,
    filter=False,
    use_docker=True
)
print(f'Mountainsort4 found {len(sorting_MS4.get_unit_ids())} units')

Running in docker image docker://spikeinterface/mountainsort4:0.1.0
HITHER JOBS: 1 pending; 0 queued; 0 running; 1 finished; 1 errored; 0 cache hits
Elapsed time for manage-pending-jobs: 491.8024661540985 sec


KeyboardInterrupt: 

In [17]:
hi.function?