Scrpt to generate datasets from extracted ro data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import copy
import re
from pathlib import Path
import pickle
import warnings

import numpy as np
import pandas as pd

from keller_zlatic_vnc.data_processing import generate_roi_dataset

## Parameters go here

In [3]:
ps = dict()

# Location of excel file specifying where the data for each experiment is saved relative to the base folder
#ps['data_loc_file'] = r'A:\projects\keller_vnc\data\experiment_data_locations.xlsx'
ps['data_loc_file'] = r'A:\projects\keller_vnc\data\EM_Volume_experiment_data_locations.xlsx'

# Additional parameters (constant for all datasets) specifying where the data is stored
ps['image_base_folder'] =r'K:\\SV4'
ps['image_processed_folder'] = 'Results\\WeightFused'
#ps['img_ext'] = r'weightFused.TimeRegistration.templateSpace.klb'
ps['img_ext'] = r'weightFused.TimeRegistration.klb'
#ps['extracted_folder'] = 'extracted\\rois_5_25_25'





# Specify groups of extracted roi information 
roi_group_info = [{'group_name': 'rois_1_5_5', 
                   'folder': 'extracted\\rois_1_5_5',
                   'param_filenames': ['extraction_params.pkl', 'long_baseline_extract_params.pkl'],
                   'f_filename': 'extracted_f.h5',
                   'f_ts_data_str': 'f_1_5_5',
                   'baseline_filenames': ['baseline_f.h5', 'baseline_f_long.h5'],
                   'baseline_ts_data_strs': ['bl_1_5_5', 'bl_1_5_5_long'],
                   'locs_filename': 'roi_locs.pkl'},
                  {'group_name': 'rois_2_10_10', 
                   'folder': 'extracted\\rois_2_10_10',
                   'param_filenames': ['extraction_params.pkl', 'long_baseline_extract_params.pkl'],
                   'f_filename': 'extracted_f.h5',
                   'f_ts_data_str': 'f_2_10_10',
                   'baseline_filenames': ['baseline_f.h5', 'baseline_f_long.h5'],
                   'baseline_ts_data_strs': ['bl_2_10_10', 'bl_2_10_10_long'],
                   'locs_filename': 'roi_locs.pkl'},
                  {'group_name': 'rois_4_20_20', 
                   'folder': 'extracted\\rois_4_20_20',
                   'param_filenames': ['extraction_params.pkl', 'long_baseline_extract_params.pkl'], 
                   'f_filename': 'extracted_f.h5',
                   'f_ts_data_str': 'f_4_20_20',
                   'baseline_filenames': ['baseline_f.h5', 'baseline_f_long.h5'],
                   'baseline_ts_data_strs': ['bl_4_20_20', 'bl_4_20_20_long'],
                   'locs_filename': 'roi_locs.pkl'}]

# Specify where we will save the dataset relative the subfolder for each dataset
ps['save_folder'] = 'extracted'

## Read in excel file specifying location of each dataset

In [4]:
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

data_locs = pd.read_excel(ps['data_loc_file'], header=1, usecols=[1, 2], converters=converters)

## Define this function, which must exist in the namespace for unpickling of results

We remove this in the dataset, so we no longer have this pickling issue going forward

In [5]:
#def preprocess_f(x):
#    x[x==0] = ps['0_sub_value'] 
#    return x

## Process each dataset

In [6]:
n_datasets = len(data_locs)

In [7]:
for d_i in range(n_datasets):

    frame_rate = np.nan   
 
    # Determine locations of images
    data_main_folder = data_locs['Main folder'][d_i]
    data_sub_folder = data_locs['Subfolder'][d_i]
    img_folder = Path(ps['image_base_folder']) / data_main_folder / data_sub_folder / ps['image_processed_folder']
        
    # Prepare information for each group of rois
    roi_dicts = []
    for g_info in roi_group_info:
        extracted_dir =  Path(ps['image_base_folder']) / data_main_folder / data_sub_folder / Path(g_info['folder']) 
        
        # Read in the files with parameter settings for roi extraction and baseline calculations
        extract_params = list()
        for p_file in g_info['param_filenames']:
            with open(extracted_dir / p_file, 'rb') as f:
                e_params = pickle.load(f)
                #e_params['roi_extract_opts']['preprocess_f'] = 'not_recorded'
                #extract_params.append(e_params)
            
        # Create the roi dict
        roi_values = [{'file': Path(extracted_dir) / g_info['f_filename'], 'name': g_info['f_ts_data_str']}]
        for bl_file, bl_label in zip(g_info['baseline_filenames'], g_info['baseline_ts_data_strs']):
            roi_values.append({'file': Path(extracted_dir) / bl_file, 'name': bl_label})
        
        roi_dict = {'group_name': g_info['group_name'], 
                    'roi_locs_file': Path(extracted_dir) / g_info['locs_filename'], 
                    'roi_values': roi_values,
                    'extra_attributes': {'extract_params': extract_params}}
        roi_dicts.append(roi_dict)
        
    dataset = generate_roi_dataset(img_folder=img_folder, img_ext=ps['img_ext'], frame_rate=frame_rate, roi_dicts=roi_dicts, 
                                    metadata=dict(), run_checks=False, add_images=True)
        
    # Save the dataset
    dataset_file_name = 'dataset.pkl'
    save_file = Path(ps['image_base_folder']) / data_main_folder / data_sub_folder / Path(ps['save_folder']) / dataset_file_name
    with open(save_file, 'wb') as f:
        pickle.dump(dataset.to_dict(), f)
        
    print('Done processing subject ' + data_main_folder + '/' + data_sub_folder)
    print('Dataset saved to: ' + str(save_file))

Searching for image files...
Found 10367 images.
Done processing subject CW_18-02-15/L1-561nm-openLoop_20180215_163233.corrected
Dataset saved to: K:\SV4\CW_18-02-15\L1-561nm-openLoop_20180215_163233.corrected\extracted\dataset.pkl


In [8]:
with open(extracted_dir / p_file, 'rb') as f:
        e_params = pickle.load(f)