<a href="https://colab.research.google.com/github/laurenneal/capstone-visual-neuroscience/blob/Dylan/Validation_Wrapper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Setup

In [42]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
# Install CaImAn - takes around 2 minutes

!git clone https://github.com/flatironinstitute/CaImAn.git
%cd '/content/CaImAn/'
!pip install -e .

# Install caiman dependencies (&> /dev/null will suppress the hundreds of printed lines in the output)
!pip install -r requirements.txt &> /dev/null

#import other dependencies
import cv2
import glob
import numpy as np
import os
import matplotlib.pyplot as plt
import imageio

#IMPORTANT! Newer versions of h5py will cause errors when saving results
!pip install h5py==2.10.0
import h5py

#Set up caiman
!python setup.py build_ext -i

#Other file setup
!python caimanmanager.py install --inplace

#Caiman imports
import caiman as cm
from caiman.motion_correction import MotionCorrect
from caiman.source_extraction.cnmf import cnmf as cnmf
from caiman.source_extraction.cnmf import params as params
from caiman.utils.utils import download_demo
from caiman.utils.visualization import plot_contours, nb_view_patches, nb_plot_contour
from caiman.summary_images import local_correlations_movie_offline
from scipy.ndimage import center_of_mass
from IPython.display import display, clear_output

Cloning into 'CaImAn'...
remote: Enumerating objects: 24933, done.[K
remote: Counting objects: 100% (868/868), done.[K
remote: Compressing objects: 100% (432/432), done.[K
remote: Total 24933 (delta 481), reused 753 (delta 419), pack-reused 24065[K
Receiving objects: 100% (24933/24933), 518.57 MiB | 14.23 MiB/s, done.
Resolving deltas: 100% (16694/16694), done.
Checking out files: 100% (317/317), done.
/content/CaImAn
Obtaining file:///content/CaImAn
Installing collected packages: caiman
  Running setup.py develop for caiman
Successfully installed caiman-1.9.7
Collecting h5py==2.10.0
  Downloading h5py-2.10.0-cp37-cp37m-manylinux1_x86_64.whl (2.9 MB)
[K     |████████████████████████████████| 2.9 MB 4.2 MB/s 
Installing collected packages: h5py
  Attempting uninstall: h5py
    Found existing installation: h5py 3.1.0
    Uninstalling h5py-3.1.0:
      Successfully uninstalled h5py-3.1.0
Successfully installed h5py-2.10.0
running build_ext
Installed /root/caiman_data


## Get paths to movie files and labelled ROI masks

In [2]:
#get a list of our masks and a list of our stacks in the same order
from os import listdir
maskpath = '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs'
mask_filenames = [f for f in listdir(maskpath) if 'CLEAN' not in f and '.ipynb' not in f]
mask_filenames

['210815_0_1_manualROIs_mix1_syt.mat',
 '211106_1_1_manualROIs_tm4_syt.mat',
 '211023_0_1_manualROIs_tm2_tm4_syt.mat',
 '210731_0_1_manualROIs_t5_syt.mat',
 '210728_0_1_manualROIs_tm2_tm9_syt.mat',
 '211106_0_1_manualROIs_tm1_syt.mat']

In [3]:
stack_indices = [x[:10] for x in mask_filenames] #get the index portion of the masks
stack_celltypes = [x[22:(len(x)-4)] for x in mask_filenames] #get the cell type descriptions
stack_filenames = ['CLEAN_' + stack_indices[x] + '_stackRaw_mc_' + stack_celltypes[x] \
                   + '_.h5' for x in range(len(mask_filenames))] #reconstruct filenames for the movies we have masks for
stack_filenames

['CLEAN_210815_0_1_stackRaw_mc_mix1_syt_.h5',
 'CLEAN_211106_1_1_stackRaw_mc_tm4_syt_.h5',
 'CLEAN_211023_0_1_stackRaw_mc_tm2_tm4_syt_.h5',
 'CLEAN_210731_0_1_stackRaw_mc_t5_syt_.h5',
 'CLEAN_210728_0_1_stackRaw_mc_tm2_tm9_syt_.h5',
 'CLEAN_211106_0_1_stackRaw_mc_tm1_syt_.h5']

In [4]:
#checking that files exist in our cleaned file - only 2 do
stackpath = '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/stackRaw/CLEANED'
stackfiles = [f for f in listdir(stackpath) if f in stack_filenames]
stackfiles

['CLEAN_211106_1_1_stackRaw_mc_tm4_syt_.h5',
 'CLEAN_210728_0_1_stackRaw_mc_tm2_tm9_syt_.h5',
 'CLEAN_210815_0_1_stackRaw_mc_mix1_syt_.h5']

In [5]:
#convert filenames back into paths
maskpaths = [maskpath+'/'+f for f in mask_filenames]
stackpaths = [stackpath+'/'+f for f in stackfiles]

In [6]:
#for now only keep the masks we cleaned have movies for - also in the correct order to match up
maskpaths = [maskpaths[x] for x in [1, 0, 4]]
maskpaths

['../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/211106_1_1_manualROIs_tm4_syt.mat',
 '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/210815_0_1_manualROIs_mix1_syt.mat',
 '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/210728_0_1_manualROIs_tm2_tm9_syt.mat']

In [7]:
#join the lists into pairs of tuples
mask_stack_pairs = list(map(lambda x, y:[x,y], maskpaths, stackpaths))
mask_stack_pairs

[['../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/211106_1_1_manualROIs_tm4_syt.mat',
  '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/stackRaw/CLEANED/CLEAN_211106_1_1_stackRaw_mc_tm4_syt_.h5'],
 ['../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/210815_0_1_manualROIs_mix1_syt.mat',
  '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/stackRaw/CLEANED/CLEAN_210728_0_1_stackRaw_mc_tm2_tm9_syt_.h5'],
 ['../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/210728_0_1_manualROIs_tm2_tm9_syt.mat',
  '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/stackRaw/CLEANED/CLEAN_210815_0_1_stackRaw_mc_mix1_syt_.h5']]

## Initialize parameters object with starter values

In [18]:
#create parameters object
opts = params.CNMFParams()
#fname will be assigned in the loop
fnames = []
subfolder = 'stackRaw_mc'

In [19]:
# set up some parameters for extraction
#fnames = path to video file, set above
                        # file(s) to be analyzed
is_patches = False       # flag for processing in patches or not - CONFIRMED FROM CARL
fr = 20                 # approximate frame rate of data - CONFIRMED FPS
decay_time = .4        # length of transient - CONFIRMED APPROPRIATE FOR OUR INDICATOR GCaMP6f
dims = [128, 256]

if is_patches:          # PROCESS IN PATCHES AND THEN COMBINE 
    rf = 50             # half size of each patch -not tuned
    stride = 10          # overlap between patches -not tuned
    K = 3               # number of components in each patch - TUNE
else:                   # PROCESS THE WHOLE FOV AT ONCE
    rf = None           # setting these parameters to None Not used
    stride = None       # will run CNMF on the whole FOV not used
    K = 20              # number of neurons expected (in the whole FOV) - not used

gSig = [4, 4]           # expected half size of neurons - TUNE
merge_thresh = 0.9     # merging threshold, max correlation allowed - TUNE
p = 1                   # order of the autoregressive system - 0 from carl's code, probably should be 1
gnb = 2                 # global background order - TUNE

opts.set('data', {'fnames': fnames,
                   'fr': fr,
                   'decay_time': decay_time,
                   'dims': dims,
                   'rf': rf,
                   'stride': stride,
                   'K': K,
                   'gSig': gSig,
                   'merge_thr': merge_thresh,
                   'p': p,
                   'nb': gnb
                  })


# %% COMPONENT EVALUATION
# the components are evaluated in three ways:
#   a) the shape of each component must be correlated with the data
#   b) a minimum peak SNR is required over the length of a transient
#   c) each shape passes a CNN based classifier (this will pick up only neurons
#           and filter out active processes)


#Not sure if these should be tuned or not

min_SNR = 2.5      # peak SNR for accepted components (if above this, acept)
rval_thr = 0.9     # space correlation threshold (if above this, accept)
use_cnn = True      # use the CNN classifier
min_cnn_thr = 0.9  # if cnn classifier predicts below this value, reject
cnn_lowest = 0.1 # neurons with cnn probability lower than this value are rejected

opts.set('quality', {'min_SNR': min_SNR,
                                'rval_thr': rval_thr,
                                'use_cnn': use_cnn,
                                'min_cnn_thr': min_cnn_thr,
                                'cnn_lowest': cnn_lowest})

#set some temporal params
#fudge = 1               ## Lauren adding temporal param fudge_factor (default is 0.96; Carl's value = 1) -- bias correction factor for discrete time constants
#temp_iter = 5           ## Lauren adding temporal param ITER (default is 2; Carl's value=5) -- block coordinate descent iterations

#opts.set('temporal', {'fudge_factor': fudge,
#                   'ITER': temp_iter
#                  })

#set some spatial params
#srch_meth = 'dilate' ## Lauren adding spatial param search_method (Carl's set to 'dilate')
#thr_meth = 'nrg'        ## Lauren adding spatial param thr_method

#opts.set('spatial', {'thr_method': thr_meth,
#                   'method': srch_meth
#                             })

#Manually assign subfolder variable
opts.motion['var_name_hdf5'] = subfolder
opts.data['var_name_hdf5'] = subfolder

#for development purposes heavily downsample to save time
opts.init['ssub'] = 2
opts.init['tsub'] = 2

## Define pipeline function to run Caiman

In [20]:
#Function to run cnmf, run seeded cnmf using masks, then return validation

def score_params(path_to_stack, path_to_masks, opts):
  import warnings
  warnings.simplefilter(action='ignore', category=FutureWarning)

  fnames = [path_to_stack]

  try:
    
    c, dview, n_processes = cm.cluster.setup_cluster(
        backend='local', n_processes=None, single_thread=False)
    
    #Run CNMF on raw stack using passed params
    cnm = cnmf.CNMF(n_processes, params=opts, dview=dview)
    #print('cnm object initialized')
    cnm = cnm.fit_file(motion_correct = False, include_eval=True)
    #print('cnmf and component evaluation completed')


    #Read in masks and reformat
    g = h5py.File(path_to_masks, 'r')

    #transpose the matrix and save to an array A
    mask_A = g['bwMaskStack'][:].T

    g.close()

    #rearrange the dimensions and show the new shape
    mask_A = mask_A.transpose(1,0,2)

    #reshape to 2D, first dimension is 128*256 (32768), 2nd dimension is the # of ROI's
    mask_A = mask_A.reshape((mask_A.shape[1]*mask_A.shape[0]), mask_A.shape[2])

    #convert the values from 0/1 to boolean False/True
    mask_A = np.array(mask_A, dtype=bool)
    #print('mask read in and reformatted')

    dview.terminate()

    c, dview, n_processes = cm.cluster.setup_cluster(
        backend='local', n_processes=None, single_thread=False)

    #Seeded CNMF only works when seeded using mmap
    import pathlib

    #create memmory map location for the original movie
    fname_new = cm.save_memmap(fnames, base_name=pathlib.Path(fnames[0]).stem + "_memmap_", order='C')

    #read data from mmap location
    Yr, dims, T = cm.load_memmap(fname_new)

    # load frames in python format (T x X x Y)
    mov = np.reshape(Yr.T, [T] + list(dims), order='F')
    #print('movie matrix loaded for seeded cnmf')


    #For seeded CNMF, need to adjust some params
    opts.patch['only_init'] = False
    #opts.patch['rf'] = None
    opts.data['use_cnn'] = False

    #print('params adjusted for seeded cnmf')
    
    #Initialize a new cnmf object and pass in our masks as the "Ain" param
    #"Ain" is A-in, meaning the A matrix holding the spatial footprints of the roi's
    cnm_seeded = cnmf.CNMF(n_processes, params=opts, dview=dview, Ain=mask_A)
    #print('seeded cnmf object initialized')
    cnm_seeded.fit(mov)
    #print('seeded cnmf completed')

    #Try running register_ROIs and see what happens
    matched_ROIs1, matched_ROIs2, non_matched1, non_matched2, performance, A2 = cm.base.rois.register_ROIs(cnm_seeded.estimates.A, cnm.estimates.A, dims=dims)
    #print('validation and scoring completed')

    #terminate cluster
    dview.terminate()

    #restore params for the next regular cnmf iteration
    opts.patch['only_init'] = True
    opts.patch['rf'] = 50
    opts.data['use_cnn'] = False

    #return performance metrics
    return {'matched_ROIs1': matched_ROIs1,
            'matched_ROIs2': matched_ROIs2,
            'non_matched1': non_matched1,
            'non_matched2': non_matched2, 
            'performance': performance,
            'A2': A2,
            'cnm_estimates': cnm.estimates.A,
            'seeded_cnm_estimates': cnm_seeded.estimates.A}

  except:
    print('failed')
    dview.terminate()

## Function to score all our stacks and return the avg f1

In [21]:
#loop through the stack-mask pairs and print the accuracy results

def score_all_stacks(all_params):

  print(all_params)

  K = all_params[0]
  gnb = all_params[1]
  merge_thresh = all_params[2]
  gSig = [all_params[3], all_params[4]] #param passes one value, recreate [x,y] structure


  from statistics import mean

  #hard coded paths since these won't change for now, proceduralize later - 1 just to test
  mask_stack_pairs = [['../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/211106_1_1_manualROIs_tm4_syt.mat',
  '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/stackRaw/CLEANED/CLEAN_211106_1_1_stackRaw_mc_tm4_syt_.h5'],
 ['../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/210815_0_1_manualROIs_mix1_syt.mat',
  '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/stackRaw/CLEANED/CLEAN_210815_0_1_stackRaw_mc_mix1_syt_.h5'],
 ['../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/210728_0_1_manualROIs_tm2_tm9_syt.mat',
  '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/stackRaw/CLEANED/CLEAN_210728_0_1_stackRaw_mc_tm2_tm9_syt_.h5']]


  # k, merge_thresh, p, gnb to tune

  #Assign params passed in by the wrapper function
  opts.data['K'] = K
  opts.data['nb'] = gnb
  opts.data['merge_thresh'] = merge_thresh
  opts.data['gSig'] = gSig

  #store precisions from each stack-mask pair for this iteration in a list, we'll return the average
  f1s = []

  #this loop will run all stacks through caiman using a given set of parameters to get the accuracy
  for pair in mask_stack_pairs:
    path_to_stack = pair[1]
    path_to_masks = pair[0]

    #set fnames before calling caiman function
    opts.data['fnames'] = [path_to_stack]
    print(opts.data['fnames'])

    results = score_params(path_to_stack=path_to_stack, \
              path_to_masks=path_to_masks, \
              opts=opts)
    
    #store this stack-mask pair's precision
    #later handle some kind of logging to keep a dict of info returned
    f1s.append(results['performance']['f1_score'])

  #return the mean precision for all mask-stack pairs
  #negative of the precision, so that the optimizer can minimize
  print(f1s)
  return -mean(f1s)



## Bayesian Model Optimization

In [22]:
!pip install scikit-optimize
import skopt
from skopt.callbacks import CheckpointSaver
from skopt.callbacks import DeltaYStopper
from skopt import plots



In [23]:
#define the parameter space - this is the entirety of the space I want to explore and will take a long time to run
SPACE = [
        skopt.space.Integer(5, 25, name='K'), #number of ROIs to expect in the FOV
        skopt.space.Integer(1,3, name='gnb'), #measure of how much background noise to remove, minimum 2 but higher could remove ROIs
        skopt.space.Real(.85, .99, name='merge_thresh'), #how correlated ROIs need to be before they get merged
        skopt.space.Integer(5, 15, name = 'gSig_wid'),#half-width of neurons - Supposedly the most important
        skopt.space.Integer(2, 10, name = 'gSig_hei')] #half-height of neurons - Supposedly the most important

In [24]:
#saves a checkpoint every iteration to this filepath

#CHANGE THIS PATH OR FILENAME BEFORE YOU RUN THIS - LEAVE THE EXTENSION AS .PKL
checkpoint_saver = CheckpointSaver('../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/search_checkpoints2.pkl')

#stopping criteria - stop if the 5 best iterations are within 3% of each other- might want to make more strict
stopping_criteria = skopt.callbacks.DeltaYStopper(.03, n_best=5)


# surrogate function, directly pass in parameter list, returns precision result directly
#this is the function that the optimizer minimizes
#score_all_stacks returns the precision * -1 so that the optimizer can minimize it
def objective(parameters):
    all_params = parameters
    return score_all_stacks(all_params)

In [None]:
#optimizer using objective function, parameter space, checkpointer, stopping criteria
results = skopt.forest_minimize(objective, SPACE, callback=[checkpoint_saver, stopping_criteria], verbose=True, )

Iteration No: 1 started. Evaluating function at random point.
[11, 2, 0.8940542086579915, 11, 3]
['../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/stackRaw/CLEANED/CLEAN_211106_1_1_stackRaw_mc_tm4_syt_.h5']
USING MODEL:/root/caiman_data/model/cnn_model.json
spatial support for each components given by the user
['../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/stackRaw/CLEANED/CLEAN_210815_0_1_stackRaw_mc_mix1_syt_.h5']
USING MODEL:/root/caiman_data/model/cnn_model.json
spatial support for each components given by the user


In [None]:
results

In [None]:
skopt.plots.plot_convergence(results)

In [None]:
skopt.plots.plot_objective(results)

## Old working code

In [57]:
# # cnm = cnmf.CNMF(n_processes, params=opts, dview=dview)
# cnm = cnm.fit_file(motion_correct = False, include_eval=True)

In [58]:
# #Read in masks and reformat
# path_to_masks = '../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/210728_0_1_manualROIs_tm2_tm9_syt.mat'
# g = h5py.File(path_to_masks, 'r')

# #transpose the matrix and save to an array A
# mask_A = g['bwMaskStack'][:].T

# g.close()

# #rearrange the dimensions and show the new shape
# mask_A = mask_A.transpose(1,0,2)

# #reshape to 2D, first dimension is 128*256 (32768), 2nd dimension is the # of ROI's
# mask_A = mask_A.reshape((mask_A.shape[1]*mask_A.shape[0]), mask_A.shape[2])

# #convert the values from 0/1 to boolean False/True
# mask_A = np.array(mask_A, dtype=bool)
# print('mask read in and reformatted')

In [59]:
# import warnings
# warnings.simplefilter(action='ignore', category=FutureWarning)

# #Seeded CNMF only works when seeded using mmap
# import pathlib

# #create memmory map location for the original movie
# fname_new = cm.save_memmap(fnames, base_name=pathlib.Path(fnames[0]).stem + "_memmap_", order='C')
#  #read data from mmap location
# Yr, dims, T = cm.load_memmap(fname_new)

# # load frames in python format (T x X x Y)
# mov = np.reshape(Yr.T, [T] + list(dims), order='F')
# print('movie matrix loaded for seeded cnmf')

# dview.terminate()

# c, dview, n_processes = cm.cluster.setup_cluster(
#     backend='local', n_processes=None, single_thread=False)

# #For seeded CNMF, need to adjust some params
# rf = None
# only_init = False

# opts.patch['only_init'] = only_init
# opts.patch['rf'] = rf

# print('params adjusted for seeded cnmf')
  
# #Initialize a new cnmf object and pass in our masks as the "Ain" param
# #"Ain" is A-in, meaning the A matrix holding the spatial footprints of the roi's
# cnm_seeded = cnmf.CNMF(n_processes, params=opts, dview=dview, Ain=mask_A)
# print('seeded cnmf object initialized')
# cnm_seeded.fit(mov)
# print('seeded cnmf completed')

In [60]:
# matched_ROIs1, matched_ROIs2, non_matched1, non_matched2, performance, A2 = cm.base.rois.register_ROIs(cnm_seeded.estimates.A, cnm.estimates.A, dims=dims)
# performance

In [61]:
# dview.terminate()

In [62]:
# results = score_params(path_to_stack=fnames[0], \
#              path_to_masks='../drive/MyDrive/DS6011_Capstone_VisualNeuroscience/DATA/manualROIs/210728_0_1_manualROIs_tm2_tm9_syt.mat', \
#              opts=opts)
# results['performance']

In [63]:
# dview.terminate()