Code for extracting segment based ROIS from the Keller/Zlatic data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import copy
from pathlib import Path

import numpy as np
import pandas as pd
import pyspark
import tifffile

from janelia_core.fileio.exp_reader import read_img_file
from keller_zlatic_vnc.raw_data_processing import generate_rois_from_segments
from keller_zlatic_vnc.raw_data_processing import video_to_roi_baselines



## Parameters go here

In [3]:
ps = dict()

# Location of file defining segments
ps['segmentation_file'] = r'A:\projects\keller_vnc\data\segmentations\1099-20210627-nuclei_label_filtered_expanded_masked_warp.tif'

# Location of excel file specifying where the data for each experiment is saved relative to the base folder
ps['data_loc_file'] = r'A:\projects\keller_vnc\data\EM_volume_experiment_data_locations.xlsx'

# Specify size of images
ps['image_size'] = (86, 851, 509)

# Name of the subfolder we create under the [Main Folder]\[Subfolder]\ directory to save extracted results
ps['extracted_folder'] = 'extracted\\rois_segments'

# Additional parameters (constant for all datasets) specifying where the data is stored
ps['image_base_folder'] =r'K:\\SV4'
ps['image_processed_folder'] = 'Results\\WeightFused'
ps['image_ext'] = r'weightFused.TimeRegistration.klb'

# Parameters for how we preprocess each image
ps['0_sub_value'] = 100.0

# Parameters for calculating baseline
ps['baseline_calc_opts'] = {'window_length': 41, 'filter_start': -20, 'write_offset': 20, 'p': .1, 'n_processes': 80}

## Read in excel file specifying location of each dataset

In [4]:
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

data_locs = pd.read_excel(ps['data_loc_file'], header=1, usecols=[1, 2], converters=converters)

## Load the segmentation file

In [5]:
seg_image = tifffile.imread(ps['segmentation_file'])
rois = generate_rois_from_segments(seg_image)

## Create a spark Context

In [6]:
conf = pyspark.SparkConf().setMaster('local[20]').setAll([
    ('spark.executor.memory', '10g'), ('spark.driver.memory','400g'), ('spark.driver.maxResultSize', '300g')])
sc = pyspark.SparkContext(conf=conf)

## Process each dataset

In [7]:
def preprocess_f(x):
    x[x==0] = ps['0_sub_value'] 
    return x

In [8]:
roi_extract_opts = {'preprocess_f': preprocess_f}

In [11]:
base_data_dir

WindowsPath('K:/SV4/CW_18-02-15/L1-561nm-openLoop_20180215_163233.corrected/Results/WeightFused')

In [10]:
n_subjects = len(data_locs)
for d_i in range(n_subjects):
    print('**********************************************************************************************')
    print('Processing subject ' + str(d_i + 1) + ' of ' + str(n_subjects))
    
    base_data_dir = Path(ps['image_base_folder']) / data_locs['Main folder'][d_i] / data_locs['Subfolder'][d_i] / ps['image_processed_folder']
    save_dir =  Path(ps['image_base_folder']) / data_locs['Main folder'][d_i] / data_locs['Subfolder'][d_i] / Path(ps['extracted_folder'])
    
    roi_info = video_to_roi_baselines(base_data_dir=base_data_dir, save_dir=save_dir, 
                                             roi_extract_opts=roi_extract_opts, extract_params=ps,
                                             img_file_ext=ps['image_ext'],
                                             baseline_calc_opts=ps['baseline_calc_opts'], new_comp=False, sc=sc, 
                                             rois=rois)

**********************************************************************************************
Processing subject 1 of 1
Save directory does not already exist.  Creating: K:\SV4\CW_18-02-15\L1-561nm-openLoop_20180215_163233.corrected\extracted\rois_segments
Beginning supervoxel extraction.
Searching for image files...
Found 10367 images.
Extracting: 11958 ROIs from 10367 images.
Extracted 11958 ROIS in 630.0572121143341 seconds.
Beginning baseline calculation.
Baselines calculated in 99.68912553787231 seconds.
