This is a notebook for recalculating baselines after fluorescence has been extracted for specified super voxels

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os.path
from pathlib import Path
import pickle

import h5py
import pandas as pd
import pyspark

from janelia_core.dataprocessing.baseline import percentile_filter_multi_d
from janelia_core.dataprocessing.dataset import ROIDataset
from janelia_core.fileio.data_handlers import NDArrayHandler


## Parameters go here

In [4]:
ps = dict()

# Location of excel file specifying where the data for each experiment is saved relative to the base folder
ps['data_loc_file'] = r'A:\projects\keller_vnc\data\EM_volume_experiment_data_locations.xlsx'

# Additional parameters (constant for all datasets) specifying where the data is stored
ps['base_folder'] =r'W:\\SV4'
ps['dataset_folder'] = 'extracted'

# Options for calculating new baselines
ps['baseline_calc_opts'] =  {'window_length': 3001, 'filter_start': -1500, 'write_offset': 1500, 'p': .1, 'n_processes': 80}
ps['extract_params_file_name'] = 'long_baseline_extract_params.pkl'

# Specify the roi groups we compute baselines for
# ps['new_comps'] = [{'f_filename': 'extracted_f.h5',
#                   'new_bl_filename': 'baseline_f_long.h5',
#                   'baseline_save_folder': 'brain_rois_1_5_5'},
#                   {'f_filename': 'extracted_f.h5',
#                   'new_bl_filename': 'baseline_f_long.h5',
#                   'baseline_save_folder': 'brain_rois_2_10_10'},
#                   {'f_filename': 'extracted_f.h5',
#                   'new_bl_filename': 'baseline_f_long.h5',
#                   'baseline_save_folder': 'brain_rois_4_20_20'},
#                   {'f_filename': 'extracted_f.h5',
#                   'new_bl_filename': 'baseline_f_long.h5',
#                   'baseline_save_folder': 'roi_segments_3_13_13'},
#                   {'f_filename': 'extracted_f.h5',
#                   'new_bl_filename': 'baseline_f_long.h5',
#                   'baseline_save_folder': 'roi_segments_4_17_17'},
#                   {'f_filename': 'extracted_f.h5',
#                   'new_bl_filename': 'baseline_f_long.h5',
#                   'baseline_save_folder': 'rois_1_5_5'},
#                   {'f_filename': 'extracted_f.h5',
#                   'new_bl_filename': 'baseline_f_long.h5',
#                   'baseline_save_folder': 'rois_2_10_10'},
#                   {'f_filename': 'extracted_f.h5',
#                   'new_bl_filename': 'baseline_f_long.h5',
#                   'baseline_save_folder': 'rois_4_20_20'}]

ps['new_comps'] = [{'f_filename': 'extracted_f.h5',
                   'new_bl_filename': 'baseline_f_long.h5',
                   'baseline_save_folder': 'roi_segments_cell_bodies_20200610'}]

## Read in excel file specifying location of each dataset

In [5]:
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

data_locs = pd.read_excel(ps['data_loc_file'], header=1, usecols=[1, 2], converters=converters)

## Recalculate baselines for each dataset

In [6]:
n_datasets = len(data_locs)
for d_i in range(n_datasets):
    
    # Get relevant paths
    data_main_folder = data_locs['Main folder'][d_i]
    data_sub_folder = data_locs['Subfolder'][d_i]
    
    # Base save path - this is the one which holds the subfolders holding the flourescence and baseline data
    base_save_dir =  Path(ps['base_folder']) / data_main_folder / data_sub_folder / Path(ps['dataset_folder'])

    for grp_specs in ps['new_comps']:
 
        # Form path to where we will save the new baseline data
        fluoresence_file =  base_save_dir / grp_specs['baseline_save_folder'] / grp_specs['f_filename']
        baseline_file = base_save_dir / grp_specs['baseline_save_folder'] / grp_specs['new_bl_filename']
        
        skip_baseline_calcs =os.path.exists(baseline_file)
        
        # Get the fluouresence data
        if not skip_baseline_calcs:
            
            with h5py.File(fluoresence_file, 'r') as file_h:
                f = file_h['data'][:]

            # Calculate baselines
            baseline_vls = percentile_filter_multi_d(f, **ps['baseline_calc_opts'])
            baseline_vls = baseline_vls.astype('float32')

            # Save extracted baseline information
            with h5py.File(baseline_file, 'w') as f:
                f.create_dataset('data', data=baseline_vls)

            # Now we save extraction parameters
            param_save_file = base_save_dir / grp_specs['baseline_save_folder'] / ps['extract_params_file_name'] 
            with open(param_save_file, 'wb') as f:
                pickle.dump(ps, f)
        
            print('Done calculating new baselines for ' + str(fluoresence_file))
        else:
            print('Baselines already calculated for ' + str(fluoresence_file))
        
    print('Done with dataset ' + str(d_i+1) + ' of ' + str(n_datasets) + '.')

Done calculating new baselines for W:\SV4\CW_18-02-15\L1-561nm-openLoop_20180215_163233.corrected\extracted\roi_segments_cell_bodies_20200610\extracted_f.h5
Done with dataset 1 of 1.
