<a href="https://colab.research.google.com/github/laurenneal/capstone-visual-neuroscience/blob/Dylan/Seeded_CNMF_Pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Run these cells once to set up caiman and define the functions to be used below

### Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Install CaImAn - takes around 2 minutes

!git clone https://github.com/flatironinstitute/CaImAn.git
%cd '/content/CaImAn/'
!pip install -e .

# Install caiman dependencies (&> /dev/null will suppress the hundreds of printed lines in the output)
!pip install -r requirements.txt &> /dev/null

#import other dependencies
import cv2
import glob
import numpy as np
import os
import matplotlib.pyplot as plt
import imageio

#IMPORTANT! Newer versions of h5py will cause errors when saving results
!pip install h5py==2.10.0
import h5py

#Set up caiman
!python setup.py build_ext -i

#Other file setup
!python caimanmanager.py install --inplace

#Caiman imports
import caiman as cm
from caiman.motion_correction import MotionCorrect
from caiman.source_extraction.cnmf import cnmf as cnmf
from caiman.source_extraction.cnmf import params as params
from caiman.utils.utils import download_demo
from caiman.utils.visualization import plot_contours, nb_view_patches, nb_plot_contour
from caiman.summary_images import local_correlations_movie_offline
from scipy.ndimage import center_of_mass
from IPython.display import display, clear_output

Cloning into 'CaImAn'...
remote: Enumerating objects: 24960, done.[K
remote: Counting objects: 100% (6/6), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 24960 (delta 0), reused 2 (delta 0), pack-reused 24954[K
Receiving objects: 100% (24960/24960), 518.36 MiB | 29.75 MiB/s, done.
Resolving deltas: 100% (16746/16746), done.
Checking out files: 100% (317/317), done.
/content/CaImAn
Obtaining file:///content/CaImAn
Installing collected packages: caiman
  Running setup.py develop for caiman
Successfully installed caiman-1.9.7
Collecting h5py==2.10.0
  Downloading h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 7.7 MB/s 
Installing collected packages: h5py
  Attempting uninstall: h5py
    Found existing installation: h5py 3.1.0
    Uninstalling h5py-3.1.0:
      Successfully uninstalled h5py-3.1.0
Successfully installed h5py-2.10.0
running build_ext
Installed /root/caiman_data


In [3]:
#logging and configuring enviroment for interactive visualizations
#Some of this is redundant

try:
    get_ipython().magic(u'load_ext autoreload')
    get_ipython().magic(u'autoreload 2')
    print(1)
except:
    print('NOT IPYTHON')

from ipyparallel import Client
import logging
import matplotlib.pyplot as plt
import numpy as np
import os
import psutil
from scipy.ndimage.filters import gaussian_filter
import sys

import caiman as cm
from caiman.utils.visualization import nb_view_patches3d
import caiman.source_extraction.cnmf as cnmf
from caiman.components_evaluation import evaluate_components, estimate_components_quality_auto
from caiman.cluster import setup_cluster
from caiman.paths import caiman_datadir

import bokeh.plotting as bpl
bpl.output_notebook()

#uncomment this to enable detailed logging for debugging

# logging.basicConfig(format=
#                           "%(relativeCreated)12d [%(filename)s:%(funcName)20s():%(lineno)s] [%(process)d] %(message)s",
#                     # filename="/tmp/caiman.log",
#                     level=logging.DEBUG)

1


### Initialize parameters object with starter values

In [4]:
#create parameters object
opts = params.CNMFParams()
#fname will be assigned in the loop
fnames = []
subfolder = 'stackRaw_mc'
opts.motion['var_name_hdf5'] = subfolder
opts.data['var_name_hdf5'] = subfolder

In [5]:
# # set initial values for extraction and evaluation
# # most of these are specific to our data and will not need to be changed during optimization

# # overall params about our data

fr = 20                 # approximate frame rate of data - CONFIRMED FPS
decay_time = .4         # length of transient - CONFIRMED APPROPRIATE FOR OUR INDICATOR GCaMP6f
dims = [128, 256]       # dimensions of the FOV in pixels - CONFIRMED
dxy = [.29, .29]        # resolution of 1 pixel in um - CONFIRMED BY CARL

opts.set('data', {'fnames': fnames,
                   'fr': fr,
                   'decay_time': decay_time,
                   'dims': dims,
                   'dxy': dxy
                  })


# # params related to the temporal traces

p = 0                   # order of the autoregressive system - 0 from carl's code
#fudge_factor = .96        # (default is 0.96; Carl's value = 1) -- bias correction factor for discrete time constants
#ITER = 2                # (default is 2; Carl's value=5) -- block coordinate descent iterations
#tnb = 1                  # temporal global background components - TUNE

opts.set('temporal', {'p': p,
                      #'fudge_factor': fudge_factor,
                      #'ITER': ITER#,
                      #'nb': tnb
                 })

# p is also set in the preprocessing step
opts.set('preprocess', {'p': p
                 })



# # params related to the FOV and patches for parallel processing

# is_patches = False      # flag for processing in patches or not - turn on or off - Not used in Matlab

# if is_patches:          # PROCESS IN PATCHES AND THEN COMBINE 
#     rf = 25             # half size of each patch 
#     stride = 5          # overlap between patches 
#     K = 3               # number of components in each patch
#     p_patch = p

# else:                   # PROCESS THE WHOLE FOV AT ONCE
#     rf = None           # setting these parameters to None
#     stride = None       # will run CNMF on the whole FOV 
#     K = 12              # number of neurons expected (in the whole FOV) - 40 from Carl's Code, seems to be too many

# n_processes = 2         # Number of processes to run in parallel, 2 for 2 cores available in Colab

# opts.set('patch', {'rf': rf,
#                    'stride': stride,
#                    'n_processes': n_processes,
#                    "K": K
#                   })   



# initialization params
ssub = 2               # spatial downsampling
tsub = 2                # temporal downsampling
# ssub_B = 1              # background spatial downsampling
# gSig = [5,5]            # radius (half-size) of average neurons (in pixels)
# tau=0                   # standard deviation of neuron size along x and y - from Carl's code
# nb = 1                  # number of background components
method_init = 'sparse_nmf' #python Caiman defaults to greedy_roi, carl's code uses sparse_nmf, but sparse_nmf runs MUCH slower

opts.set('init', {#'K': K,            # declared above in patch params    
                   #'tau': tau,      
                   'tsub': tsub, 
                   'ssub': ssub, 
                   #'ssub_B': ssub_B, 
                   #'nb': nb,
                   'method_init': method_init
                  })

# # parameters related to merging correlated ROIs
# merge_thr = 0.95     # merging threshold, max correlation allowed - From Carl's Code

# opts.set('merging', {'merge_thr': merge_thr
#                             })


#set some spatial params
snb = 2                  # spatial global background components

opts.set('spatial', {'nb': snb
                            })

# # %% COMPONENT EVALUATION
# # the components are evaluated in three ways:
# #   a) the shape of each component must be correlated with the data
# #   b) a minimum peak SNR is required over the length of a transient
# #   c) each shape passes a CNN based classifier (this will pick up only neurons
# #           and filter out active processes)


# #Not sure if these should be tuned or not

min_SNR = 2.5      # peak SNR for accepted components (if above this, acept)
SNR_lowest = 1         # minimum SNR for accepted components (if below this, reject)
rval_thr = 0.9     # space correlation threshold (if above this, accept)

use_cnn = True      # use the CNN classifier affects if 2 below params are used
min_cnn_thr = 0.9  # if cnn classifier predicts below this value, reject
cnn_lowest = 0.1   # neurons with cnn probability lower than this value are rejected

opts.set('quality', {'min_SNR': min_SNR,
                     'SNR_lowest': SNR_lowest,
                     'rval_thr': rval_thr,
                     'use_cnn': use_cnn,
                     'min_cnn_thr': min_cnn_thr,
                     'cnn_lowest': cnn_lowest})

In [13]:
#Function to run seeded cnmf using masks, then return results

def seeded_cnmf(path_to_stack, path_to_masks, opts):
  import warnings
  warnings.simplefilter(action='ignore', category=FutureWarning)

  fnames = [path_to_stack]
  opts.set('data', {'fnames': fnames})

  try:

    #cluster handling
    if 'dview' in locals():
      cm.stop_server(dview=dview)
    dview = cm.cluster.start_server(ncpus=2) #Start a cluster with 2 CPU's (available in colab)


    #Read in masks and reformat
    g = h5py.File(path_to_masks, 'r')

    #transpose the matrix and save to an array A
    mask_A = g['bwMaskStack'][:].T

    #close the .mat file holding the masks
    g.close()

    #rearrange the dimensions of masks to match reformatted .h5 movie that has been flipped (not necessary if input movie has not been flipped), 
                                                                                                                  #but will afect following lines
    mask_A = mask_A.transpose(1,0,2)

    #reshape to 2D, first dimension is 128*256 (32768), 2nd dimension is the # of ROI's
    mask_A = mask_A.reshape((mask_A.shape[1]*mask_A.shape[0]), mask_A.shape[2])

    #convert the values from 0/1 to boolean False/True
    mask_A = np.array(mask_A, dtype=bool)
    print('mask read in and reformatted')

    #For seeded CNMF, need to verify certain params
    opts.patch['only_init'] = False
    opts.data['use_cnn'] = False

    print('params adjusted for seeded cnmf')
    
    #Initialize a new cnmf object and pass in our masks as the "Ain" param
    #"Ain" is A-in, meaning the A matrix holding the spatial footprints of the roi's
    cnm_seeded = cnmf.CNMF(n_processes = 2, params=opts, dview=dview, Ain=mask_A)
    print('seeded cnmf object initialized')
    cnm_seeded.fit_file(motion_correct = False, include_eval=True)
    print('seeded cnmf completed')

    cm.stop_server(dview=dview)

    #return the cnm object and a dictionary of results
    return cnm_seeded, {'stack_name': path_to_stack[:(len(path_to_stack)-4)].split('/')[6], #chops the movie identifier out of the filepath
                         'spatial': np.transpose(cnm_seeded.estimates.A.A, axes = (1,0)), #de-sparsified (.A) and transposed to put the axes in roi x pixel order
                         'temporal': cnm_seeded.estimates.C}


  except:
    print('failed')
    cm.stop_server(dview=dview)

In [18]:
#Function to run non-seeded cnmf, but allow for slicing the FOV, then return results
def sliced_cnmf(path_to_stack, path_to_masks, opts):

  import warnings
  warnings.simplefilter(action='ignore', category=FutureWarning)

  fnames = [path_to_stack]
  opts.set('data', {'fnames': fnames})

  try:

    #cluster handling
    if 'dview' in locals():
      cm.stop_server(dview=dview)
    dview = cm.cluster.start_server(ncpus=2) #Start a cluster with 2 CPU's (available in colab)

    opts.set('data', {
                      'dims': [128, 256]
                      })

    cnm_sliced = cnmf.CNMF(n_processes = 2, params=opts, dview=dview)
    print('cnmf object initialized')
                                                                          #change these for y    #change these for x
    cnm_sliced.fit_file(motion_correct = False, include_eval=True, indices=(slice(0, 128, 1), slice(70, 140, 1)))
    print('cnmf completed')

    cm.stop_server(dview=dview)

    #return the cnm object and a dictionary of results
    return cnm_sliced, {'stack_name': path_to_stack[:(len(path_to_stack)-4)].split('/')[6], #chops the movie identifier out of the filepath
                          'spatial': np.transpose(cnm_sliced.estimates.A.A, axes = (1,0)), #de-sparsified (.A) and transposed to put the axes in roi x pixel order
                          'temporal': cnm_sliced.estimates.C}

    cm.stop_server(dview=dview)

  except:
    print('failed')
    cm.stop_server(dview=dview)

# Run everything below this cell to extract, validate, and save outputs. Change the path_to_stack and path_to_mask variables to point to the files you want to extract

In [15]:
#manually setting the paths to the example files in the new folder
#The whole path should be the same, only need to change the filenames at the end of the paths

path_to_stack = '/content/drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Preformatted_Movies/210802_0_1_stackRaw_mc_mix1_syt_.mat'
path_to_masks = '/content/drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Seed_Masks/2110802_0_1_manualROIs_mix1_syt.mat'

In [16]:
!ls ../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Seed_Masks/2110802_0_1_manualROIs_mix1_syt.mat

../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded_CNMF/Seed_Masks/2110802_0_1_manualROIs_mix1_syt.mat


In [17]:
#Call the function, passing in a string path to the movie, path to masks, and the params object('opts')

cnm_object, results = sliced_cnmf(path_to_stack = path_to_stack, #from the pairs of files above, could be swapped out for string directly
                      path_to_masks = path_to_masks, #same as above
                      opts = opts) #params object globally initialized earlier

#visualize results
cnm_object.estimates.nb_view_components(denoised_color = 'red')

Waiting for connection file: ~/.ipython/profile_default/security/ipcontroller-client.json
..............



seeded cnmf object initialized
USING MODEL:/root/caiman_data/model/cnn_model.json
....seeded cnmf completed


<caiman.source_extraction.cnmf.estimates.Estimates at 0x7f39120adfd0>

In [12]:
#ONLY RUN THIS CELL IF THERE ARE ROIS TO REMOVE

print(f'{len(results["temporal"])} rois extracted')
#collect a list of rois to drop
rois_to_drop = list(map(int, input('enter the roi numbers to drop in this format: "1,3,4" with no spaces or quotes then hit enter').split(',')))

#convert from roi number to index
roi_indices_to_drop = [x-1 for x in rois_to_drop]

#remove rois from spatial and temporal matrix
results['temporal'] = np.delete(results['temporal'], roi_indices_to_drop, axis = 0)
results['spatial'] = np.delete(results['spatial'], roi_indices_to_drop, axis = 0)

print(f'{len(results["temporal"])} rois kept')

12 rois extracted
enter the roi numbers to drop in this format: "1,3,4" with no spaces or quotes then hit enter5,9
10 rois kept


## save results

In [13]:
#procedurally generate the path to the result file we will write
results_folder_path ='/content/drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded CNMF/Results/'
result_h5_path = results_folder_path + results['stack_name'] + '_result.h5'
result_h5_path

'/content/drive/MyDrive/DS6011_Capstone_VisualNeuroscience/Seeded CNMF/Results/EXAMPLE_210728_0_1_stackRaw_mc_tm2_tm9_syt_result.h5'

In [14]:
#Write the results into an hdf5 file
with h5py.File(result_h5_path, "w") as result_file:
  result_file.create_dataset('stack_name', data=results['stack_name'])
  result_file.create_dataset('temporal', data=results['temporal'])
  result_file.create_dataset('spatial', data=results['spatial'])


## Uncomment and run these cells if you want to verify that the results were saved correctly

In [39]:
# #Read those results back to test
# results_read = h5py.File(result_h5_path,'r')
# results_read.keys()

<KeysViewHDF5 ['spatial', 'stack_name', 'temporal']>

In [42]:
# #verify that everything wrote to the file is identical to the original results
# print(results_read['spatial'][:].all() == results['spatial'].all())
# print(results_read['temporal'][:].all() == results['temporal'].all())
# print(results_read['stack_name'][()] == results['stack_name'])

True
True
True


In [None]:
# #close the results file
# results_read.close()