Scrpt to generate datasets from extracted ro data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import copy
import re
from pathlib import Path
import pickle
import warnings

import pandas as pd

from keller_zlatic_vnc.data_processing import generate_roi_dataset

## Parameters go here

In [3]:
ps = dict()

# Location of excel file specifying where the data for each experiment is saved relative to the base folder
ps['data_loc_file'] = r'A:\projects\keller_vnc\data\experiment_data_locations.xlsx'

# Location of excel file holding experimental annotations 
ps['annotation_file'] = r'A:\projects\keller_vnc\data\manip_event_annotations.xlsx'

# Additional parameters (constant for all datasets) specifying where the data is stored
ps['image_base_folder'] =r'K:\\SV4'
ps['image_processed_folder'] = 'Results\\WeightFused'
ps['img_ext'] = r'weightFused.TimeRegistration.templateSpace.klb'
ps['extracted_folder'] = 'extracted\\rois_5_25_25'

# Specify groups of extracted roi information 
roi_group_info = [{'group_name': 'rois_5_25_25', 
                   'folder': 'extracted\\rois_5_25_25',
                   'param_filename': 'extraction_params.pkl', 
                   'f_filename': 'extracted_f.h5',
                   'f_ts_data_str': 'f_5_25_25',
                   'baseline_filename': 'baseline_f.h5',
                   'baseline_ts_data_str': 'bl_5_25_25',
                   'locs_filename': 'roi_locs.pkl'}]

# Specify where we will save the dataset relative the subfolder for each dataset
ps['save_folder'] = 'extracted'

## Read in excel file specifying location of each dataset

In [4]:
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

data_locs = pd.read_excel(ps['data_loc_file'], header=1, usecols=[1, 2], converters=converters)

## Read in excel file with annotations

In [5]:
annotations = pd.read_excel(ps['annotation_file'], header=0)

## Process each dataset

In [None]:
n_datasets = len(data_locs)

In [None]:
for d_i in range(n_datasets):
    
    # Find the appropriate annotations for this experiment
    data_main_folder = data_locs['Main folder'][d_i]
    data_sub_folder = data_locs['Subfolder'][d_i]

    # Parse the subject from the subfolder string
    match = re.search('(?P<subject>.+)-561nm.+', data_sub_folder)
    if not match:
        raise(RuntimeError('Unable to parse experiment sub-folder name.'))
    else:
        subject = match['subject']
        annot_match_str = data_main_folder + '-' + subject
    
    annot_rows = annotations['Date and sample'] == annot_match_str
    exp_annotations = copy.deepcopy(annotations[annot_rows]).reindex()
    
    proc_subj = True
    if len(exp_annotations) > 0:

        metadata = {'manip_event_annotations': exp_annotations}
        frame_rate = 1/exp_annotations['Interval Time'].to_numpy()[0]    
    else:
        warnings.warn('Unable to find any annotations for subject ' + annot_match_str)
        proc_subj = False
        
    if proc_subj:
        
        # Adjust annotations of manipulation start and stop frames for 0 indexing
        exp_annotations['Manipulation Start']  = exp_annotations['Manipulation Start'] - 1
        exp_annotations['Manipulation End']  = exp_annotations['Manipulation End'] - 1
        
        # Determine locations of images
        img_folder = Path(ps['image_base_folder']) / data_main_folder / data_sub_folder / ps['image_processed_folder']
        
        # Prepare information for each group of rois
        roi_dicts = []
        for g_info in roi_group_info:
            extracted_dir =  Path(ps['image_base_folder']) / data_main_folder / data_sub_folder / Path(g_info['folder']) 
        
            # Read in the file with parameter settings for roi extraction and baseline calculations
            with open(extracted_dir / g_info['param_filename'], 'rb') as f:
                extract_params = pickle.load(f)
            
            # Create the dataset
            roi_values = [{'file': Path(extracted_dir) / g_info['f_filename'], 'name': g_info['f_ts_data_str']},
                          {'file': Path(extracted_dir) / g_info['baseline_filename'], 'name': g_info['baseline_ts_data_str']}]
            roi_dict = {'group_name': g_info['group_name'], 
                          'roi_locs_file': Path(extracted_dir) / g_info['locs_filename'], 
                          'roi_values': roi_values,
                          'extra_attributes': {'extract_params': extract_params}}
            roi_dicts.append(roi_dict)
        
        dataset = generate_roi_dataset(img_folder=img_folder, img_ext=ps['img_ext'], frame_rate=frame_rate, roi_dicts=roi_dicts, 
                                       metadata=metadata)
        
        # Save the dataset
        dataset_file_name = annot_match_str + '_dataset.pkl'
        save_file = Path(ps['image_base_folder']) / data_main_folder / data_sub_folder / Path(ps['save_folder']) / dataset_file_name
        with open(save_file, 'wb') as f:
            pickle.dump(dataset.to_dict(), f)
        
        print('Done processing subject ' + annot_match_str + '.')
        print('Dataset saved to: ' + str(save_file))
        
        
    