<a href="https://colab.research.google.com/github/magland/spikeforest_batch_run/blob/master/notebooks/spikeforest_analysis2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# SpikeForest analysis

This notebook represents a complete spikeforest analysis of the bionet studies. You should execute the first few cells and then skip down to the section of interest below.

In [0]:
# Only run this cell if you are running this on a hosted runtime that does not have these packages installed
# %%capture is used to suppress the output... this should take up to a minute to complete
%%capture
!pip install spikeforest
!pip install git+https://github.com/magland/spikeforest_batch_run

In [1]:
# Import the python packages -- autoreload is used for development purposes
%load_ext autoreload
%autoreload 2

import os
os.environ['VDOMR_MODE']='COLAB'

import spikeforest as sf
from kbucket import client as kb
import vdomr as vd
import batcho

vdomr: using colab because of VDOMR_MODE environment variable


In [0]:
## Configure readonly access to kbucket -- use this if you only want to browse the results ---
sf.kbucketConfigRemote(name='spikeforest1-readonly')

In [3]:
## Configure read/write access to kbucket -- use this if you are preparing the studies or the processing batches
sf.kbucketConfigRemote(name='spikeforest1-readwrite',ask_password=True)

Enter password: ··········
Pairio user set to spikeforest. Test succeeded.


## Prepare recordings

In [0]:
def read_text_file(path):
  path2=kb.realizeFile(path)
  if path2 is None:
    raise Exception('Unable to realize file: '+path)
  with open(path2,'r') as f:
    return f.read()
  
def prepare_bionet_studies(*,basedir, channels, study_set_name='bionet', suffix=''):
  studies=[]
  recordings=[]
  names=['bionet_drift','bionet_shuffle','bionet_static']
  for name in names:
    study_name=name+suffix
    study_dir=basedir+'/bionet/'+name
    description=read_text_file(study_dir+'/readme.txt')
    study0=dict(
        name=study_name,
        study_set=study_set_name,
        directory=study_dir,
        description=description
    )
    studies.append(study0)
    dd=kb.readDir(study_dir)
    for dsname in dd['dirs']:
        dsdir='{}/{}'.format(study_dir,dsname)
        rec0=dict(
            name=dsname,
            study=study_name,
            description='',
            directory=dsdir,
            channels=channels
        )
        if len(rec0['channels'])>0:
          units=sf.sf_batch.select_units_on_channels(
              recording_dir=dsdir,
              firings=dsdir+'/firings_true.mda',
              channels=rec0['channels']
          )
          rec0['units_true']=units
        recordings.append(rec0)
  return studies, recordings

def prepare_bionet8c_studies(*,basedir):
  channels = list(range(8))
  
  studies, recordings = prepare_bionet_studies(basedir=basedir, channels=channels, study_set_name='bionet', suffix='_8c')
  return studies, recordings


def prepare_bionet32c_studies(*,basedir):
  channels = list(range(32))
  
  studies, recordings = prepare_bionet_studies(basedir=basedir, channels=channels, study_set_name='bionet', suffix='_32c')
  return studies, recordings


def prepare_magland_synth_studies(*,basedir):
  study_set_name='magland_synth'
  studies=[]
  recordings=[]
  names=[]
  names=names+['datasets_noise10_K10_C4','datasets_noise10_K10_C8']
  names=names+['datasets_noise10_K20_C4','datasets_noise10_K20_C8']
  names=names+['datasets_noise20_K10_C4','datasets_noise20_K10_C8']
  names=names+['datasets_noise20_K20_C4','datasets_noise20_K20_C8']
  description=read_text_file(basedir+'/magland_synth/readme.txt')
  for name in names:
    study_name='magland_synth_'+name[9:]
    study_dir=basedir+'/magland_synth/'+name
    study0=dict(
        name=study_name,
        study_set=study_set_name,
        directory=study_dir,
        description=description
    )
    studies.append(study0)
    dd=kb.readDir(study_dir)
    for dsname in dd['dirs']:
        dsdir='{}/{}'.format(study_dir,dsname)
        recordings.append(dict(
            name=dsname,
            study=study_name,
            directory=dsdir,
            description='One of the recordings in the {} study'.format(study_name)
        ))
  return studies, recordings

def prepare_mearec_tetrode_studies(*,basedir):
  study_set_name='mearec_tetrode'
  studies=[]
  recordings=[]
  names=[]
  names=names+['datasets_noise10_K10_C4','datasets_noise10_K20_C4']
  names=names+['datasets_noise20_K10_C4','datasets_noise20_K20_C4']
  description=read_text_file(basedir+'/mearec_synth/tetrode/readme.txt')
  for name in names:
    study_name='mearec_tetrode_'+name[9:]
    study_dir=basedir+'/mearec_synth/tetrode/'+name
    study0=dict(
        name=study_name,
        study_set=study_set_name,
        directory=study_dir,
        description=description
    )
    studies.append(study0)
    dd=kb.readDir(study_dir)
    for dsname in dd['dirs']:
        dsdir='{}/{}'.format(study_dir,dsname)
        recordings.append(dict(
            name=dsname,
            study=study_name,
            directory=dsdir,
            description='One of the recordings in the {} study'.format(study_name)
        ))
  return studies, recordings

def prepare_mearec_neuronexus_studies(*,basedir):
  study_set_name='mearec_neuronexus'
  studies=[]
  recordings=[]
  names=[]
  names=names+['datasets_noise10_K10_C32','datasets_noise10_K20_C32','datasets_noise10_K40_C32']
  names=names+['datasets_noise20_K10_C32','datasets_noise20_K20_C32','datasets_noise20_K40_C32']
  description=read_text_file(basedir+'/mearec_synth/neuronexus/readme.txt')
  for name in names:
    study_name='mearec_neuronexus_'+name[9:]
    study_dir=basedir+'/mearec_synth/neuronexus/'+name
    study0=dict(
        name=study_name,
        study_set=study_set_name,
        directory=study_dir,
        description=description
    )
    studies.append(study0)
    dd=kb.readDir(study_dir)
    for dsname in dd['dirs']:
        dsdir='{}/{}'.format(study_dir,dsname)
        recordings.append(dict(
            name=dsname,
            study=study_name,
            directory=dsdir,
            description='One of the recordings in the {} study'.format(study_name)
        ))
  return studies, recordings

In [0]:
basedir='kbucket://15734439d8cf/groundtruth'
#basedir='/mnt/ceph/users/jjun/groundtruth'

In [0]:
studies,recordings=prepare_bionet8c_studies(basedir=basedir)
kb.saveObject(dict(studies=studies,recordings=recordings),key=dict(name='spikeforest_bionet8c_recordings'))

In [0]:
studies,recordings=prepare_bionet32c_studies(basedir=basedir)
kb.saveObject(dict(studies=studies,recordings=recordings),key=dict(name='spikeforest_bionet32c_recordings'))

In [0]:
studies,recordings=prepare_mearec_neuronexus_studies(basedir=basedir)
kb.saveObject(dict(studies=studies,recordings=recordings),key=dict(name='spikeforest_mearec_neuronexus_recordings'))

Downloading file --- (0.0 MB): https://kbucket.flatironinstitute.org/15734439d8cf/download/groundtruth/mearec_synth/neuronexus/readme.txt -> /home/magland/kbucket_cache/4/ea/4ea6e0b1cb2e41b9d83d73856fd3c93b9620cd18
Already on server.


In [0]:
studies,recordings=prepare_magland_synth_studies(basedir=basedir)
kb.saveObject(dict(studies=studies,recordings=recordings),key=dict(name='spikeforest_magland_synth_recordings'))

Already on server.


## Create summarize recordings batches

In [0]:
def create_summarize_recordings_batch(*,recordings_name,batch_name):
  print('Creating summarize_recordings batch: '+batch_name)
  SF=sf.SFData()
  SF.loadRecordings(key=dict(name=recordings_name))
  
  jobs=[]
  for name in SF.studyNames():
    study=SF.study(name)
    for recname in study.recordingNames():
      R=study.recording(recname)
      job=dict(
          command='summarize_recording',
          label='summarize '+study.name()+'/'+R.name(),
          recording=R.getObject()
      )
      jobs.append(job)
  batch=dict(jobs=jobs)
  print('Number of jobs: {}'.format(len(jobs)))
  batcho.set_batch(batch_name=batch_name,jobs=jobs)

In [9]:
create_summarize_recordings_batch(recordings_name='spikeforest_bionet8c_recordings',batch_name='summarize_recordings_bionet8c')
create_summarize_recordings_batch(recordings_name='spikeforest_bionet32c_recordings',batch_name='summarize_recordings_bionet32c')
create_summarize_recordings_batch(recordings_name='spikeforest_magland_synth_recordings',batch_name='summarize_recordings_magland_synth')
create_summarize_recordings_batch(recordings_name='spikeforest_mearec_tetrode_recordings',batch_name='summarize_recordings_mearec_tetrode')
create_summarize_recordings_batch(recordings_name='spikeforest_mearec_neuronexus_recordings',batch_name='summarize_recordings_mearec_neuronexus')

Creating summarize_recordings batch: summarize_recordings_bionet8c
Loading recordings: {"name": "spikeforest_bionet8c_recordings"}
Number of jobs: 36
Creating summarize_recordings batch: summarize_recordings_bionet32c
Loading recordings: {"name": "spikeforest_bionet32c_recordings"}
Number of jobs: 36
Creating summarize_recordings batch: summarize_recordings_magland_synth
Loading recordings: {"name": "spikeforest_magland_synth_recordings"}
Number of jobs: 80
Creating summarize_recordings batch: summarize_recordings_mearec_tetrode
Loading recordings: {"name": "spikeforest_mearec_tetrode_recordings"}
Number of jobs: 40
Creating summarize_recordings batch: summarize_recordings_mearec_neuronexus
Loading recordings: {"name": "spikeforest_mearec_neuronexus_recordings"}
Number of jobs: 60


In [0]:
# Check the status of a batch
statuses=batcho.get_batch_job_statuses(batch_name='summarize_recordings_bionet8c')
for status in statuses:
  print(status['job']['label'],status['status'])

To run these batches, go to a computer with resources somewhere and run something like:

```
bin/sf_run_batch2 [name_of_batch] --run_prefix "srun -c 2 -n 40"
```

"srun" in Flatiron cluster reqiures you to run the following before
```
module load srun
module load matlab
```
To use GPU, run
```
bin/sf_run_batch2 [name_of_batch] --run_prefix "srun -c 2 -n 40 --gres=gpu:2 -p gpu"
```
  
where bin/sf_run_batch2 is found in the spikeforest2 repository.



## Create spike sorting batches

In [23]:
SF=sf.SFData()
SF.loadRecordings(key=dict(name='spikeforest_bionet8c_recordings'))
SF.loadRecordings(key=dict(name='spikeforest_bionet32c_recordings'))
SF.loadRecordings(key=dict(name='spikeforest_magland_synth_recordings'))
SF.loadRecordings(key=dict(name='spikeforest_mearec_tetrode_recordings'))
SF.loadRecordings(key=dict(name='spikeforest_mearec_neuronexus_recordings'))
SF.loadProcessingBatch(batch_name='summarize_recordings_bionet8c')
SF.loadProcessingBatch(batch_name='summarize_recordings_bionet32c')
SF.loadProcessingBatch(batch_name='summarize_recordings_magland_synth')
SF.loadProcessingBatch(batch_name='summarize_recordings_mearec_tetrode')
SF.loadProcessingBatch(batch_name='summarize_recordings_mearec_neuronexus')

Loading recordings: {"name": "spikeforest_bionet8c_recordings"}
Loading recordings: {"name": "spikeforest_bionet32c_recordings"}
Loading recordings: {"name": "spikeforest_magland_synth_recordings"}
Loading recordings: {"name": "spikeforest_mearec_tetrode_recordings"}
Loading recordings: {"name": "spikeforest_mearec_neuronexus_recordings"}
Loading processing batch: {"name": "batcho_batch_results", "batch_name": "summarize_recordings_bionet8c"}
Loaded 0 sorting results and 36 recording summary results
Loading processing batch: {"name": "batcho_batch_results", "batch_name": "summarize_recordings_bionet32c"}
Loaded 0 sorting results and 36 recording summary results
Loading processing batch: {"name": "batcho_batch_results", "batch_name": "summarize_recordings_magland_synth"}
Downloading file --- (0.0 MB): http://132.249.245.245:24351/7317cea8265b/download/9/41/94130ea9ec2ef54550fee6ee3c8c73baeaddaeea -> /home/magland/kbucket_cache/9/41/94130ea9ec2ef54550fee6ee3c8c73baeaddaeea
Loaded 0 sorti

In [0]:
sorter_ms4_thr3=dict(
    name='MountainSort4-thr3',
    processor_name='MountainSort4',
    params=dict(
        detect_sign=-1,
        adjacency_radius=50,
        detect_threshold=3
    )
)

sorter_irc_tetrode=dict(
    name='IronClust-tetrode',
    processor_name='IronClust',
    params=dict(
        detect_sign=-1,
        adjacency_radius=50,
        detect_threshold=5,
        prm_template_name="tetrode_template.prm"
    )
)

sorter_irc_drift=dict(
    name='IronClust-drift',
    processor_name='IronClust',
    params=dict(
        detect_sign=-1,
        adjacency_radius=50,
        prm_template_name="template_drift.prm"
    )
)

sorter_sc=dict(
    name='SpykingCircus',
    processor_name='SpykingCircus',
    params=dict(
        detect_sign=-1,
        adjacency_radius=50
    )
)

sorter_ks=dict(
    name='KiloSort',
    processor_name='KiloSort',
    params=dict(
        detect_sign=-1,
        adjacency_radius=50
    )
)

In [0]:
def create_sorting_batch(*,recordings_name,batch_name,sorters):
  print('Creating sorting batch: '+batch_name)
  SF=sf.SFData()
  SF.loadRecordings(key=dict(name=recordings_name))
  
  jobs=[]
  for name in SF.studyNames():
    study=SF.study(name)
    for rname in study.recordingNames():
      R=study.recording(rname)
      for sorter in sorters:
        job=dict(
          command='sort_recording',
          label=sorter['name']+': '+R.name(),
          recording=R.getObject(),
          sorter=sorter
        )
        jobs.append(job)

  print('Number of jobs: {}'.format(len(jobs)))
  batcho.set_batch(batch_name=batch_name,jobs=jobs)

In [0]:
# create batches

vs_sorters = ['ms4', 'irc', 'sc', 'ks']
v_sorters_tetrode = [sorter_ms4_thr3, sorter_irc_tetrode, sorter_sc, sorter_ks]
v_sorters_siprobe = [sorter_ms4_thr3, sorter_irc_drift, sorter_sc, sorter_ks]
vs_recordings_tetrode = ['magland_synth', 'mearec_tetrode']  
vs_recordings_siprobe = ['bionet8c', 'bionet32c', 'mearec_neuronexus']
vs_recordings = vs_recordings_tetrode + vs_recordings_siprobe


for recording in vs_recordings_tetrode:
  for sorter, s_sorter in zip(v_sorters_tetrode, vs_sorters):
    recordings_name = 'spikeforest_{}_recordings'.format(recording)
    batch_name = '{}_{}'.format(s_sorter, recording)
    #print(recordings_name, batch_name)
    create_sorting_batch(recordings_name=recordings_name, batch_name=batch_name, sorters=[sorter])
  print()
    
for recording in vs_recordings_siprobe:
  for sorter, s_sorter in zip(v_sorters_siprobe, vs_sorters):
    recordings_name = 'spikeforest_{}_recordings'.format(recording)
    batch_name = '{}_{}'.format(s_sorter, recording)
    #print(recordings_name, batch_name)    
    create_sorting_batch(recordings_name=recordings_name, batch_name=batch_name, sorters=[sorter])
  print()


To run these sorting batches, follow the instructions above.