Scrpt to generate datasets from extracted ro data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import copy
import re
from pathlib import Path
import pickle
import warnings

import numpy as np
import pandas as pd

from keller_zlatic_vnc.data_processing import generate_roi_dataset

## Parameters go here

In [3]:
ps = dict()

# Location of excel file specifying where the data for each experiment is saved relative to the base folder
ps['data_loc_file'] = r'A:\projects\keller_vnc\data\experiment_data_locations.xlsx'

# Additional parameters (constant for all datasets) specifying where the data is stored
ps['image_base_folder'] =r'K:\\SV4'
ps['image_processed_folder'] = 'Results\\WeightFused'
ps['img_ext'] = r'weightFused.TimeRegistration.templateSpace.klb'
ps['extracted_folder'] = 'extracted\\rois_5_25_25'

# Specify groups of extracted roi information 
roi_group_info = [{'group_name': 'rois_5_25_25', 
                   'folder': 'extracted\\rois_5_25_25',
                   'param_filename': 'extraction_params.pkl', 
                   'f_filename': 'extracted_f.h5',
                   'f_ts_data_str': 'f_5_25_25',
                   'baseline_filename': 'baseline_f.h5',
                   'baseline_ts_data_str': 'bl_5_25_25',
                   'locs_filename': 'roi_locs.pkl'}, 
                  {'group_name': 'rois_1_5_5', 
                   'folder': 'extracted\\rois_1_5_5',
                   'param_filename': 'extraction_params.pkl', 
                   'f_filename': 'extracted_f.h5',
                   'f_ts_data_str': 'f_1_5_5',
                   'baseline_filename': 'baseline_f.h5',
                   'baseline_ts_data_str': 'bl_1_5_5',
                   'locs_filename': 'roi_locs.pkl'}]

# Specify where we will save the dataset relative the subfolder for each dataset
ps['save_folder'] = 'extracted'

## Read in excel file specifying location of each dataset

In [4]:
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

data_locs = pd.read_excel(ps['data_loc_file'], header=1, usecols=[1, 2], converters=converters)

## Process each dataset

In [14]:
n_datasets = len(data_locs)

In [15]:
for d_i in range(n_datasets):

    frame_rate = np.nan   
 
    # Determine locations of images
    data_main_folder = data_locs['Main folder'][d_i]
    data_sub_folder = data_locs['Subfolder'][d_i]
    img_folder = Path(ps['image_base_folder']) / data_main_folder / data_sub_folder / ps['image_processed_folder']
        
    # Prepare information for each group of rois
    roi_dicts = []
    for g_info in roi_group_info:
        extracted_dir =  Path(ps['image_base_folder']) / data_main_folder / data_sub_folder / Path(g_info['folder']) 
        # Read in the file with parameter settings for roi extraction and baseline calculations
        with open(extracted_dir / g_info['param_filename'], 'rb') as f:
            extract_params = pickle.load(f)
            
        # Create the roi dict
        roi_values = [{'file': Path(extracted_dir) / g_info['f_filename'], 'name': g_info['f_ts_data_str']},
                      {'file': Path(extracted_dir) / g_info['baseline_filename'], 'name': g_info['baseline_ts_data_str']}]
        roi_dict = {'group_name': g_info['group_name'], 
                    'roi_locs_file': Path(extracted_dir) / g_info['locs_filename'], 
                     'roi_values': roi_values,
                     'extra_attributes': {'extract_params': extract_params}}
        roi_dicts.append(roi_dict)
        
    dataset = generate_roi_dataset(img_folder=img_folder, img_ext=ps['img_ext'], frame_rate=frame_rate, roi_dicts=roi_dicts, 
                                    metadata=dict(), run_checks=False)
        
    # Save the dataset
    dataset_file_name = 'dataset.pkl'
    save_file = Path(ps['image_base_folder']) / data_main_folder / data_sub_folder / Path(ps['save_folder']) / dataset_file_name
    with open(save_file, 'wb') as f:
        pickle.dump(dataset.to_dict(), f)
        
    print('Done processing subject ' + data_main_folder + '/' + data_sub_folder)
    print('Dataset saved to: ' + str(save_file))

Searching for image files...
Found 5416 images.
Done processing subject CW_17-08-23/L1-561nm-ROIMonitoring_20170823_145226.corrected
Dataset saved to: K:\SV4\CW_17-08-23\L1-561nm-ROIMonitoring_20170823_145226.corrected\extracted\dataset.pkl
Searching for image files...
Found 6611 images.
Done processing subject CW_17-08-23/L2-561nm-ROIMonitoring_20170823_161308.corrected
Dataset saved to: K:\SV4\CW_17-08-23\L2-561nm-ROIMonitoring_20170823_161308.corrected\extracted\dataset.pkl
Searching for image files...
Found 7208 images.
Done processing subject CW_17-08-23/L4-561nm-ROIMonitoring_20170823_193101.corrected
Dataset saved to: K:\SV4\CW_17-08-23\L4-561nm-ROIMonitoring_20170823_193101.corrected\extracted\dataset.pkl
Searching for image files...
Found 6413 images.
Done processing subject CW_17-08-24/L4-561nm-ROImonitoring_20170824_184800.corrected
Dataset saved to: K:\SV4\CW_17-08-24\L4-561nm-ROImonitoring_20170824_184800.corrected\extracted\dataset.pkl
Searching for image files...
Found 6

Done processing subject CW_17-11-05/L7-561nm-ROImonitoring_20171105_180850.corrected
Dataset saved to: K:\SV4\CW_17-11-05\L7-561nm-ROImonitoring_20171105_180850.corrected\extracted\dataset.pkl
Searching for image files...
Found 6493 images.
Done processing subject CW_17-11-06/L1-561nm-ROImonitoring_20171106_141140.corrected
Dataset saved to: K:\SV4\CW_17-11-06\L1-561nm-ROImonitoring_20171106_141140.corrected\extracted\dataset.pkl
Searching for image files...
Found 6493 images.
Done processing subject CW_17-11-06/L2-561nm-ROImonitoring_20171106_154314.corrected
Dataset saved to: K:\SV4\CW_17-11-06\L2-561nm-ROImonitoring_20171106_154314.corrected\extracted\dataset.pkl
Searching for image files...
Found 6493 images.
Done processing subject CW_17-11-06/L3-561nm-ROImonitoring_20171106_174549.corrected
Dataset saved to: K:\SV4\CW_17-11-06\L3-561nm-ROImonitoring_20171106_174549.corrected\extracted\dataset.pkl
Searching for image files...
Found 3865 images.
Done processing subject CW_17-11-07/