<a href="https://colab.research.google.com/github/laurenneal/capstone-visual-neuroscience/blob/Dylan/run_validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook is to set up a process for running cnmf and validation in a single call from score_params

## Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Install CaImAn - takes around 2 minutes

!git clone https://github.com/flatironinstitute/CaImAn.git
%cd '/content/CaImAn/'
!pip install -e .

# Install caiman dependencies (&> /dev/null will suppress the hundreds of printed lines in the output)
!pip install -r requirements.txt &> /dev/null

#import other dependencies
import cv2
import glob
import os
import numpy as np
import matplotlib.pyplot as plt
import imageio

#IMPORTANT! Newer versions of h5py will cause errors when saving results
!pip install h5py==2.10.0
import h5py

#Set up caiman
!python setup.py build_ext -i

#Other file setup
!python caimanmanager.py install --inplace

#Caiman imports
import caiman as cm
from caiman.motion_correction import MotionCorrect
from caiman.source_extraction.cnmf import cnmf as cnmf
from caiman.source_extraction.cnmf import params as params
from caiman.utils.utils import download_demo
from caiman.utils.visualization import plot_contours, nb_view_patches, nb_plot_contour
from caiman.summary_images import local_correlations_movie_offline
from scipy.ndimage import center_of_mass
from IPython.display import display, clear_output

Cloning into 'CaImAn'...
remote: Enumerating objects: 24916, done.[K
remote: Counting objects: 100% (851/851), done.[K
remote: Compressing objects: 100% (421/421), done.[K
remote: Total 24916 (delta 475), reused 739 (delta 414), pack-reused 24065[K
Receiving objects: 100% (24916/24916), 518.52 MiB | 27.67 MiB/s, done.
Resolving deltas: 100% (16688/16688), done.
Checking out files: 100% (317/317), done.
/content/CaImAn
Obtaining file:///content/CaImAn
Installing collected packages: caiman
  Running setup.py develop for caiman
Successfully installed caiman-1.9.6
Collecting h5py==2.10.0
  Downloading h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 28.0 MB/s 
Installing collected packages: h5py
  Attempting uninstall: h5py
    Found existing installation: h5py 3.1.0
    Uninstalling h5py-3.1.0:
      Successfully uninstalled h5py-3.1.0
Successfully installed h5py-2.10.0
running build_ext
Installed /root/caiman_data


## Define score_params function

In [3]:
def score_params(path_to_stack, path_to_masks, opts):
  fnames = [path_to_stack]

  #start a cluster for parallel processing (if a cluster already exists it will be closed and a new session will be opened)
  if 'dview' in locals():
      cm.stop_server(dview=dview)
  c, dview, n_processes = cm.cluster.setup_cluster(
      backend='local', n_processes=None, single_thread=False)
  
  print('cluster started')

  #Run CNMF on raw stack using passed params
  cnm = cnmf.CNMF(n_processes, params=opts, dview=dview)
  print('cnm object initialized')
  cnm = cnm.fit_file(motion_correct = False, include_eval=True)
  print('cnmf and component evaluation completed')


  #Read in masks and reformat
  g = h5py.File(path_to_masks, 'r')

  #transpose the matrix and save to an array A
  mask_A = g['bwMaskStack'][:].T

  g.close()

  #rearrange the dimensions and show the new shape
  mask_A = mask_A.transpose(1,0,2)

  #reshape to 2D, first dimension is 128*256 (32768), 2nd dimension is the # of ROI's
  mask_A = mask_A.reshape((mask_A.shape[1]*mask_A.shape[0]), mask_A.shape[2])

  #convert the values from 0/1 to boolean False/True
  mask_A = np.array(mask_A, dtype=bool)
  print('mask read in and reformatted')



  #Seeded CNMF only works when seeded using mmap
  import pathlib

  #create memmory map location for the original movie
  fname_new = cm.save_memmap(fnames, base_name=pathlib.Path(fnames[0]).stem + "_memmap_", order='C')

  #read data from mmap location
  Yr, dims, T = cm.load_memmap(fname_new)

  # load frames in python format (T x X x Y)
  mov = np.reshape(Yr.T, [T] + list(dims), order='F')
  print('movie matrix loaded for seeded cnmf')


  #For seeded CNMF, need to adjust some params
  rf = None
  only_init = False

  opts.patch['only_init'] = only_init
  opts.patch['rf'] = rf

  print('params adjusted for seeded cnmf')

  #restart cluster to clean up memory
  cm.stop_server(dview=dview)
  c, dview, n_processes = cm.cluster.setup_cluster(
      backend='local', n_processes=None, single_thread=False)
  
  print('cluster restarted to clean up memory')
  
  #Initialize a new cnmf object and pass in our masks as the "Ain" param
  #"Ain" is A-in, meaning the A matrix holding the spatial footprints of the roi's
  cnm_seeded = cnmf.CNMF(n_processes, params=opts, dview=dview, Ain=mask_A)
  print('seeded cnmf object initialized')
  cnm_seeded.fit(mov)
  print('seeded cnmf completed')

  #Try running register_ROIs and see what happens
  matched_ROIs1, matched_ROIs2, non_matched1, non_matched2, performance, A2 = cm.base.rois.register_ROIs(cnm_seeded.estimates.A, cnm.estimates.A, dims=dims)
  print('validation and scoring completed')

  #terminate cluster
  cm.stop_server(dview=dview)

  #return performance metrics
  return matched_ROIs1, matched_ROIs2, non_matched1, non_matched2, performance, A2

## Set up parameters object

### this object will be different each time we pass it through. This is the object we tweak for our gridsearch
### most of this can be removed eventually

In [4]:
#temporary, declare fname, subfolder, and mask path ahead of time.
#will need to implement this in a loop for gridsearch

#temporarily running off of subset of movie, just 500 frames
fnames = ['../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/subset_stackRaw_mc.h5'] #needs to be a list
subfolder = 'stackRaw_mc' #name of folder within mat file
path_to_masks = '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/manualROIs_fixed/210728_0_1_manualROIs_tm2_tm9_syt.mat' #not a list


In [5]:
#create parameters object
opts = params.CNMFParams()

In [6]:
# set up some parameters for extraction
#fnames = path to video file, set above
                        # file(s) to be analyzed
is_patches = False       # flag for processing in patches or not - CONFIRMED FROM CARL
fr = 20                 # approximate frame rate of data - CONFIRMED FPS
decay_time = .4        # length of transient - CONFIRMED APPROPRIATE FOR OUR INDICATOR GCaMP6f
dims = [128, 256]

if is_patches:          # PROCESS IN PATCHES AND THEN COMBINE - not used
    rf = 25             # half size of each patch - not used
    stride = 5          # overlap between patches - not used
    K = 3               # number of components in each patch - not used
else:                   # PROCESS THE WHOLE FOV AT ONCE
    rf = None           # setting these parameters to None - CONFIRMED
    stride = None       # will run CNMF on the whole FOV - CONFIRMED
    K = 40              # number of neurons expected (in the whole FOV) - TUNE

gSig = [4, 4]           # expected half size of neurons - TUNE
tau = 0                 ## Lauren adding std of gaussian kernel (size of neuron)
                        ## gSig = size of kernel (default 2*tau + 1).
merge_thresh = 0.95     # merging threshold, max correlation allowed - TUNE
p = 0                   # order of the autoregressive system - From carl's code, probably should be 1
gnb = 1                 # global background order - TUNE

fudge = 1               ## Lauren adding temporal param fudge_factor (default is 0.96; Carl's value = 1) -- bias correction factor for discrete time constants
temp_iter = 5           ## Lauren adding temporal param ITER (default is 2; Carl's value=5) -- block coordinate descent iterations
srch_meth = 'dilate' ## Lauren adding spatial param search_method (Carl's set to 'dilate')
thr_meth = 'nrg'        ## Lauren adding spatial param thr_method

var_name_hdf5 = subfolder #variable to path caiman into the subfolder within our mat files

opts.set('data', {'fnames': fnames,
                   'fr': fr,
                   'decay_time': decay_time,
                   'dims': dims,
                   'rf': rf,
                   'stride': stride,
                   'K': K,
                   ## 'gSig': gSig,
                   'merge_thr': merge_thresh,
                   'p': p,
                   'nb': gnb,
                   'fudge_factor': fudge,
                   'ITER': temp_iter,
                   'thr_method': thr_meth,
                   'method': srch_meth
                  
                  })




In [7]:
# %% COMPONENT EVALUATION
# the components are evaluated in three ways:
#   a) the shape of each component must be correlated with the data
#   b) a minimum peak SNR is required over the length of a transient
#   c) each shape passes a CNN based classifier (this will pick up only neurons
#           and filter out active processes)


#Not sure if these should be tuned or not

min_SNR = 2.5      # peak SNR for accepted components (if above this, acept)
rval_thr = 0.9     # space correlation threshold (if above this, accept)
use_cnn = True      # use the CNN classifier
min_cnn_thr = 0.9  # if cnn classifier predicts below this value, reject
cnn_lowest = 0.1 # neurons with cnn probability lower than this value are rejected

opts.set('quality', {'min_SNR': min_SNR,
                                'rval_thr': rval_thr,
                                'use_cnn': use_cnn,
                                'min_cnn_thr': min_cnn_thr,
                                'cnn_lowest': cnn_lowest})

In [8]:
#THE MOTION PARAMS DOES NOT PULL IN THE VAR NAME BY DEFAULT
#NEEDS TO BE MANUALLY INJECTED SO IT GETS PULLED INTO MOTION CORRECT WITH THE REST

opts.motion['var_name_hdf5'] = subfolder
opts.data['var_name_hdf5'] = subfolder

## Run score_params and return accuracy

In [9]:
%%capture
matched_ROIs1, matched_ROIs2, non_matched1, non_matched2, performance, A2 = score_params(path_to_stack=fnames[0], \
                                                                                         path_to_masks=path_to_masks, \
                                                                                         opts=opts)

In [None]:
performance

{'accuracy': 0.08333333333333333,
 'f1_score': 0.15384615384615385,
 'precision': 0.1111111111111111,
 'recall': 0.25}