<a href="https://colab.research.google.com/github/magland/spikeforest_batch_run/blob/master/notebooks/spikeforest_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## SpikeForest bionet analysis

This notebook represents a complete spikeforest analysis of the bionet studies. You should execute the first few cells and then skip down to the section of interest below.

In [0]:
# Only run this cell if you are running this on a hosted runtime that does not have these packages installed
# %%capture is used to suppress the output... this should take up to a minute to complete
%%capture
!pip install spikeforest
!pip install git+https://github.com/magland/spikeforest_batch_run

In [0]:
# Import the python packages -- autoreload is used for development purposes
%load_ext autoreload
%autoreload 2

import spikeforest as sf
from kbucket import client as kb

In [0]:
## Configure readonly access to kbucket -- use this if you only want to browse the results ---
sf.kbucketConfigRemote(name='spikeforest1-readonly')

In [3]:
## Configure read/write access to kbucket -- use this if you are preparing the studies or the processing batches
sf.kbucketConfigRemote(name='spikeforest1-readwrite',ask_password=True)

Enter password: ··········
Pairio user set to spikeforest. Test succeeded.


## Prepare recordings

In [0]:
def read_text_file(path):
  path2=kb.realizeFile(path)
  if path2 is None:
    raise Exception('Unable to realize file: '+path)
  with open(path2,'r') as f:
    return f.read()
  
def prepare_bionet_studies(*,basedir,channels):
  study_set_name='bionet'
  studies=[]
  recordings=[]
  names=['bionet_drift','bionet_shuffle','bionet_static']
  for name in names:
    study_name=name
    study_dir=basedir+'/bionet/'+name
    description=read_text_file(study_dir+'/readme.txt')
    study0=dict(
        name=study_name,
        study_set=study_set_name,
        directory=study_dir,
        description=description
    )
    studies.append(study0)
    dd=kb.readDir(study_dir)
    for dsname in dd['dirs']:
        dsdir='{}/{}'.format(study_dir,dsname)
        rec0=dict(
            name=dsname,
            study=study_name,
            description='',
            directory=dsdir,
            channels=channels
        )
        if len(rec0['channels'])>0:
          units=sf.sf_batch.select_units_on_channels(
              recording_dir=dsdir,
              firings=dsdir+'/firings_true.mda',
              channels=rec0['channels']
          )
          rec0['units_true']=units
        recordings.append(rec0)
  return studies, recordings

def prepare_magland_synth_studies(*,basedir):
  study_set_name='magland_synth'
  studies=[]
  recordings=[]
  names=[]
  names=names+['datasets_noise10_K10_C4','datasets_noise10_K10_C8']
  names=names+['datasets_noise10_K20_C4','datasets_noise10_K20_C8']
  names=names+['datasets_noise20_K10_C4','datasets_noise20_K10_C8']
  names=names+['datasets_noise20_K20_C4','datasets_noise20_K20_C8']
  description=read_text_file(basedir+'/magland_synth/readme.txt')
  for name in names:
    study_name='magland_synth_'+name[9:]
    study_dir=basedir+'/magland_synth/'+name
    study0=dict(
        name=study_name,
        study_set=study_set_name,
        directory=study_dir,
        description=description
    )
    studies.append(study0)
    dd=kb.readDir(study_dir)
    for dsname in dd['dirs']:
        dsdir='{}/{}'.format(study_dir,dsname)
        recordings.append(dict(
            name=dsname,
            study=study_name,
            directory=dsdir,
            description='One of the recordings in the {} study'.format(study_name)
        ))
  return studies, recordings

In [0]:
basedir='kbucket://15734439d8cf/groundtruth'

In [0]:
channels=[0,1,2,3,4,5,6,7]
studies,recordings=prepare_bionet_studies(basedir=basedir,channels=channels)
kb.saveObject(dict(studies=studies,recordings=recordings),key=dict(name='spikeforest_bionet_recordings'))

In [0]:
studies,recordings=prepare_magland_synth_studies(basedir=basedir)
kb.saveObject(dict(studies=studies,recordings=recordings),key=dict(name='spikeforest_magland_synth_recordings'))

Already on server.


## Create summarize recordings batches

In [0]:
def create_summarize_recordings_batch(*,recordings_name,batch_name):
  print('Creating summarize_recordings batch: '+batch_name)
  SF=sf.SFData()
  SF.loadRecordings(key=dict(name=recordings_name))
  
  jobs=[]
  for name in SF.studyNames():
    study=SF.study(name)
    for recname in study.recordingNames():
      R=study.recording(recname)
      job=dict(
          command='summarize_recording',
          label=R.name(),
          recording=R.getObject()
      )
      jobs.append(job)
  batch=dict(jobs=jobs)
  print('Number of jobs: {}'.format(len(jobs)))
  kb.saveObject(key=dict(batch_name=batch_name),object=batch)

In [0]:
create_summarize_recordings_batch(recordings_name='spikeforest_bionet_recordings',batch_name='summarize_recordings_bionet')
create_summarize_recordings_batch(recordings_name='spikeforest_magland_synth_recordings',batch_name='summarize_recordings_magland_synth')

Creating summarize_recordings batch: summarize_recordings_bionet
Number of jobs: 36
Already on server.
Creating summarize_recordings batch: summarize_recordings_magland_synth
Number of jobs: 80


To run these batches, go to a computer with resources somewhere and run something like:

```
bin/sf_run_batch [name_of_batch] --run_prefix "srun -c 2 -n 40"
```

where bin/sf_run_batch is found in the spikeforest_batch_run repository.

Alternatively, you can test run it in this notebook using the following commands:

In [0]:
## Note: usually you would not run this cell -- see the note above.

import spikeforest_batch_run as sbr
# Execute prepareBatch once (serially)
sbr.prepareBatch(batch_name='summarize_recordings_bionet')

# Execute runBatch many times in parallel
sbr.runBatch(batch_name='summarize_recordings_bionet')

# Execute assembleBatchResults once (serially)
sbr.assembleBatchResults(batch_name='summarize_recordings_bionet')

Preparing batch summarize_recordings_bionet with 36 jobs
Running batch summarize_recordings_bionet with 36 jobs
Assembling results for batch summarize_recordings_bionet with 36 jobs
ASSEMBLING: drift_8x_A_2A
ASSEMBLING: drift_8x_A_2B
ASSEMBLING: drift_8x_A_4A
ASSEMBLING: drift_8x_A_4B
ASSEMBLING: drift_8x_B_2A
ASSEMBLING: drift_8x_B_2B
ASSEMBLING: drift_8x_B_4A
ASSEMBLING: drift_8x_B_4B
ASSEMBLING: drift_8x_C_2A
ASSEMBLING: drift_8x_C_2B
ASSEMBLING: drift_8x_C_4A
ASSEMBLING: drift_8x_C_4B
ASSEMBLING: shuffle_8x_A_2A
ASSEMBLING: shuffle_8x_A_2B
ASSEMBLING: shuffle_8x_A_4A
ASSEMBLING: shuffle_8x_A_4B
ASSEMBLING: shuffle_8x_B_2A
ASSEMBLING: shuffle_8x_B_2B
ASSEMBLING: shuffle_8x_B_4A
ASSEMBLING: shuffle_8x_B_4B
ASSEMBLING: shuffle_8x_C_2A
ASSEMBLING: shuffle_8x_C_2B
ASSEMBLING: shuffle_8x_C_4A
ASSEMBLING: shuffle_8x_C_4B
ASSEMBLING: static_8x_A_2A
ASSEMBLING: static_8x_A_2B
ASSEMBLING: static_8x_A_4A
ASSEMBLING: static_8x_A_4B
ASSEMBLING: static_8x_B_2A
ASSEMBLING: static_8x_B_2B
ASSEMBLI

## Browse recordings

In [0]:
SF=sf.SFData()
SF.loadRecordings(key=dict(name='spikeforest_bionet_recordings'))
SF.loadRecordings(key=dict(name='spikeforest_magland_synth_recordings'))
SF.loadProcessingBatch(key=dict(batch_name='summarize_recordings_bionet',name='job_results'))
SF.loadProcessingBatch(key=dict(batch_name='summarize_recordings_magland_synth',name='job_results'))

Loaded 0 sorting results and 36 recording summary results
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/1/e5/1e55d734f322497acaee334660f373bd270cf2cd -> /home/magland/kbucket_cache/1/e5/1e55d734f322497acaee334660f373bd270cf2cd
Loaded 0 sorting results and 80 recording summary results


In [0]:
X=sf.SFSelectWidget(sfdata=SF,mode='recording')
display(X)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

0,1
Study **:,bionet_driftbionet_shufflebionet_staticmagland_synth_noise10_K10_C4magland_synth_noise10_K10_C8magland_synth_noise10_K20_C4magland_synth_noise10_K20_C8magland_synth_noise20_K10_C4magland_synth_noise20_K10_C8magland_synth_noise20_K20_C4magland_synth_noise20_K20_C8
Recording:,drift_8x_A_2Adrift_8x_A_2Bdrift_8x_A_4Adrift_8x_A_4Bdrift_8x_B_2Adrift_8x_B_2Bdrift_8x_B_4Adrift_8x_B_4Bdrift_8x_C_2Adrift_8x_C_2Bdrift_8x_C_4Adrift_8x_C_4B


In [0]:
R=X.recording()
display(R.plot('timeseries'))
display(R.plot('waveforms_true'))
display(R.trueUnitsInfo())

In [0]:
R.plotNames()

['timeseries', 'waveforms_true']

## Create spike sorting batches

In [4]:
SF=sf.SFData()
SF.loadRecordings(key=dict(name='spikeforest_bionet_recordings'))
SF.loadRecordings(key=dict(name='spikeforest_magland_synth_recordings'))
SF.loadProcessingBatch(key=dict(batch_name='summarize_recordings_bionet',name='job_results'))
SF.loadProcessingBatch(key=dict(batch_name='summarize_recordings_magland_synth',name='job_results'))

Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/3/b6/3b673117cec90f6e040220b4da908dd22cc20957 -> /tmp/sha1-cache/3/b6/3b673117cec90f6e040220b4da908dd22cc20957
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/d/41/d41ed6b9b22ba7cf651c230b07aca996320298c6 -> /tmp/sha1-cache/d/41/d41ed6b9b22ba7cf651c230b07aca996320298c6
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/1/ab/1abdcd3ef2ec66da41a759ca3a2a580a114c2321 -> /tmp/sha1-cache/1/ab/1abdcd3ef2ec66da41a759ca3a2a580a114c2321
Loaded 0 sorting results and 36 recording summary results
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/1/e5/1e55d734f322497acaee334660f373bd270cf2cd -> /tmp/sha1-cache/1/e5/1e55d734f322497acaee334660f373bd270cf2cd
Loaded 0 sorting results and 80 recording summary results


In [0]:
sorter_ms4_thr3=dict(
    name='MountainSort4-thr3',
    processor_name='MountainSort4',
    params=dict(
        detect_sign=-1,
        adjacency_radius=100,
        detect_threshold=3
    )
)

sorter_irc_tetrode=dict(
    name='IronClust-tetrode',
    processor_name='IronClust',
    params=dict(
        detect_sign=-1,
        adjacency_radius=100,
        detect_threshold=5,
        prm_template_name="tetrode_template.prm"
    )
)

sorter_irc_drift=dict(
    name='IronClust-drift',
    processor_name='IronClust',
    params=dict(
        detect_sign=-1,
        adjacency_radius=100,
        prm_template_name="drift_template.prm"
    )
)

sorter_sc=dict(
    name='SpykingCircus',
    processor_name='SpykingCircus',
    params=dict(
        detect_sign=-1,
        adjacency_radius=100
    )
)

sorter_ks_tetrode=dict(
    name='KiloSort',
    processor_name='KiloSort',
    params=dict(
        detect_sign=-1,
        adjacency_radius=-1
    )
)

sorter_ks_drift=dict(
    name='KiloSort',
    processor_name='KiloSort',
    params=dict(
        detect_sign=-1,
        adjacency_radius=100
    )
)

In [0]:
def create_sorting_batch(*,recordings_name,batch_name,sorters):
  print('Creating sorting batch: '+batch_name)
  SF=sf.SFData()
  SF.loadRecordings(key=dict(name=recordings_name))
  
  jobs=[]
  for name in SF.studyNames():
    study=SF.study(name)
    for rname in study.recordingNames():
      R=study.recording(rname)
      for sorter in sorters:
        job=dict(
          command='sort_recording',
          label=sorter['name']+': '+R.name(),
          recording=R.getObject(),
          sorter=sorter
        )
        jobs.append(job)

  batch=dict(jobs=jobs)
  print('Number of jobs: {}'.format(len(jobs)))
  kb.saveObject(key=dict(batch_name=batch_name),object=batch)

In [10]:
create_sorting_batch(recordings_name='spikeforest_magland_synth_recordings',batch_name='ms4_magland_synth',sorters=[sorter_ms4_thr3])
create_sorting_batch(recordings_name='spikeforest_magland_synth_recordings',batch_name='irc_magland_synth',sorters=[sorter_irc_tetrode])
create_sorting_batch(recordings_name='spikeforest_magland_synth_recordings',batch_name='sc_magland_synth',sorters=[sorter_sc])
create_sorting_batch(recordings_name='spikeforest_magland_synth_recordings',batch_name='ks_magland_synth',sorters=[sorter_ks_tetrode])

create_sorting_batch(recordings_name='spikeforest_bionet_recordings',batch_name='ms4_bionet',sorters=[sorter_ms4_thr3])
create_sorting_batch(recordings_name='spikeforest_bionet_recordings',batch_name='irc_bionet',sorters=[sorter_irc_drift])
create_sorting_batch(recordings_name='spikeforest_bionet_recordings',batch_name='sc_bionet',sorters=[sorter_sc])
create_sorting_batch(recordings_name='spikeforest_bionet_recordings',batch_name='ks_bionet',sorters=[sorter_ks_drift])

Creating sorting batch: ms4_magland_synth
Number of jobs: 80
Already on server.
Creating sorting batch: irc_magland_synth
Number of jobs: 80
Already on server.
Creating sorting batch: sc_magland_synth
Number of jobs: 80
Already on server.
Creating sorting batch: ks_magland_synth
Number of jobs: 80
Creating sorting batch: ms4_bionet
Number of jobs: 36
Already on server.
Creating sorting batch: irc_bionet
Number of jobs: 36
Already on server.
Creating sorting batch: sc_bionet
Number of jobs: 36
Already on server.
Creating sorting batch: ks_bionet
Number of jobs: 36


To run these sorting batches, follow the instructions above.

## Browse sorting results

In [0]:
SF=sf.SFData()
SF.loadRecordings(key=dict(name='spikeforest_bionet_recordings'))
SF.loadRecordings(key=dict(name='spikeforest_magland_synth_recordings'))
SF.loadProcessingBatch(key=dict(batch_name='summarize_recordings_bionet',name='job_results'))
SF.loadProcessingBatch(key=dict(batch_name='summarize_recordings_magland_synth',name='job_results'))

SF.loadProcessingBatch(key=dict(batch_name='ms4_magland_synth',name='job_results'))
SF.loadProcessingBatch(key=dict(batch_name='sc_magland_synth',name='job_results'))
SF.loadProcessingBatch(key=dict(batch_name='irc_magland_synth',name='job_results'))

SF.loadProcessingBatch(key=dict(batch_name='ms4_bionet',name='job_results'))
#SF.loadProcessingBatch(key=dict(batch_name='sc_bionet',name='job_results')) ## Spyking circus not working yet -- need to put into singularity container
SF.loadProcessingBatch(key=dict(batch_name='irc_bionet',name='job_results'))

Loaded 0 sorting results and 36 recording summary results
Loaded 0 sorting results and 80 recording summary results
Loaded 80 sorting results and 0 recording summary results
Downloading file --- (0.1 MB): http://132.249.245.245:24351/7317cea8265b/download/f/f8/ff85038b9726cf723cddfe72c607cc34f13fe811 -> /home/magland/kbucket_cache/f/f8/ff85038b9726cf723cddfe72c607cc34f13fe811
Loaded 80 sorting results and 0 recording summary results
Downloading file --- (0.1 MB): http://132.249.245.245:24351/7317cea8265b/download/6/af/6af92f0f8e99625c1bacaee907d45998a6767e27 -> /home/magland/kbucket_cache/6/af/6af92f0f8e99625c1bacaee907d45998a6767e27
Loaded 80 sorting results and 0 recording summary results
Loaded 36 sorting results and 0 recording summary results
Loaded 36 sorting results and 0 recording summary results


In [0]:
X=sf.SFSelectWidget(sfdata=SF,mode='sorting_result')
display(X)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

0,1
Study **:,bionet_driftbionet_shufflebionet_staticmagland_synth_noise10_K10_C4magland_synth_noise10_K10_C8magland_synth_noise10_K20_C4magland_synth_noise10_K20_C8magland_synth_noise20_K10_C4magland_synth_noise20_K10_C8magland_synth_noise20_K20_C4magland_synth_noise20_K20_C8
Recording:,drift_8x_A_2Adrift_8x_A_2Bdrift_8x_A_4Adrift_8x_A_4Bdrift_8x_B_2Adrift_8x_B_2Bdrift_8x_B_4Adrift_8x_B_4Bdrift_8x_C_2Adrift_8x_C_2Bdrift_8x_C_4Adrift_8x_C_4B
Sorting result:,MountainSort4-thr3IronClust-drift


In [0]:
R=X.sortingResult()
display(R.plot('unit_waveforms'))
display(R.plot('autocorrelograms'))
display(R.comparisonWithTruth())

## Aggregate sorting results

In [0]:
SF=sf.SFData()
SF.loadRecordings(key=dict(name='spikeforest_bionet_recordings'))
SF.loadProcessingBatch(key=dict(batch_name='summarize_recordings_bionet',name='job_results'))
SF.loadProcessingBatch(key=dict(batch_name='ms4_bionet',name='job_results'))
#SF.loadProcessingBatch(key=dict(batch_name='sc_bionet',name='job_results')) ## Spyking circus not working yet -- need to put into singularity container
SF.loadProcessingBatch(key=dict(batch_name='irc_bionet',name='job_results'))

Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/3/b6/3b673117cec90f6e040220b4da908dd22cc20957 -> /tmp/sha1-cache/3/b6/3b673117cec90f6e040220b4da908dd22cc20957
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/1/ab/1abdcd3ef2ec66da41a759ca3a2a580a114c2321 -> /tmp/sha1-cache/1/ab/1abdcd3ef2ec66da41a759ca3a2a580a114c2321
Loaded 0 sorting results and 36 recording summary results
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/c/0f/c0f25b500d9f436bed530f52214d6ea6498bc4f9 -> /tmp/sha1-cache/c/0f/c0f25b500d9f436bed530f52214d6ea6498bc4f9
Loaded 36 sorting results and 0 recording summary results
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/c/b6/cb67707c116546cf6aff907c7bd73df00dc9e8ff -> /tmp/sha1-cache/c/b6/cb67707c116546cf6aff907c7bd73df00dc9e8ff
Loaded 36 sorting results and 0 recording summary results


In [0]:
import pandas as pd
import random
import altair as alt
alt.renderers.enable('colab')

# Accumulate the sorting results
def accumulate_comparison_with_ground_truth(*,SF,studies,sorter_name,fieldnames):
  ret=[]
  for study in studies:
    recordings=[study.recording(name) for name in study.recordingNames()]
    for R in recordings:
      result=R.sortingResult(sorter_name)
      A=result.comparisonWithTruth(format='json')
      B=R.trueUnitsInfo(format='json')
      snr_by_true_unit=dict()
      for b in B:
        snr_by_true_unit[b['unit_id']]=b['snr']
      for i in A:
        a=A[i]
        rec=dict()
        rec['recording_name']=R.name()
        rec['unit_id']=a['Unit ID']
        rec['snr']=snr_by_true_unit[rec['unit_id']]
        for fieldname in fieldnames:
          rec[fieldname]=float(a[fieldname])
        ret.append(rec)
  return ret

def show_accuracy_plot(*,SF,study_name,sorter_name,title):
  
  study=SF.study(study_name)
  X=accumulate_comparison_with_ground_truth(
      SF=SF,
      studies=[study],
      sorter_name=sorter_name,
      fieldnames=['Accuracy']
  )
  
  # Display the accumulated sorting results
  cc=alt.Chart(pd.DataFrame(X),title=title).mark_point().encode(
      x='snr',
      y='Accuracy',
      color='recording_name',
      tooltip='recording_name'
  ).interactive()
  display(cc)

In [0]:
import vdomr as vd

class SelectBox(vd.Component):
    def __init__(self,options=[]):
        vd.Component.__init__(self)
        self._on_change_handlers=[]
        self._value=None
        self.setOptions(options)
        
    def setOptions(self,options):
        self._options=options
        if self._value not in options:
          self._value=options[0] if options else None
        self.refresh()
        
    def value(self):
        return self._value
    
    def setValue(self,value):
        self._value=value
        self.refresh()
        
    def onChange(self,handler):
        self._on_change_handlers.append(handler)
        
    def _on_change(self,value):
        self._value=value
        for handler in self._on_change_handlers:
            handler(value=value)
        
    def render(self):
        opts=[]
        for option in self._options:
            if option==self._value:
              opts.append(vd.option(option,selected='selected'))
            else:
              opts.append(vd.option(option))
        X=vd.select(opts,onchange=self._on_change)
        return X

In [0]:
STUDY=SelectBox(options=SF.studyNames())
SORTER=SelectBox(options=['MountainSort4-thr3','IronClust-drift'])
display(STUDY)
display(SORTER)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [0]:
show_accuracy_plot(
    SF=SF,
    study_name=STUDY.value(),
    sorter_name=SORTER.value(),
    title=SORTER.value()+' '+STUDY.value()
)

Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/f/e4/fe4ee41e93b3602f9141059642859016e39892b0 -> /tmp/sha1-cache/f/e4/fe4ee41e93b3602f9141059642859016e39892b0
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/1/e1/1e1e2d84977661c448c4af999275a2e70f912fb3 -> /tmp/sha1-cache/1/e1/1e1e2d84977661c448c4af999275a2e70f912fb3
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/f/a1/fa1d7128c11479ed8c674f93bc27833f2cda745c -> /tmp/sha1-cache/f/a1/fa1d7128c11479ed8c674f93bc27833f2cda745c
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/0/96/09672a44f05bac622db112aaece46554db6fa83c -> /tmp/sha1-cache/0/96/09672a44f05bac622db112aaece46554db6fa83c
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/8/d1/8d1b0f8b9a0f0fc656df5659221108cde9cf74e5 -> /tmp/sha1-cache/8/d1/8d1b0f8b9a0f0fc656df5659221108cde9cf74e5
Downloading file --- (0.0 MB): http