This is a script meant only for development of code to generate datasets

In [1]:
%load_ext autoreload
%autoreload 2

In [12]:
import copy
from pathlib import Path
import pickle
import re 

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pyqtgraph as pg
import pyspark

from janelia_core.dataprocessing.utils import get_processed_image_data
from janelia_core.fileio.exp_reader import read_img_file
from janelia_core.visualization.roi_exploration import StaticROIViewer

from keller_zlatic_vnc.data_processing import generate_roi_dataset

In [3]:
%matplotlib qt

## Parameters go here

In [4]:
# Location of excel file specifying where the data for each experiment is saved relative to the base folder
data_loc_file = r'A:\projects\keller_vnc\data\experiment_data_locations.xlsx'
exp_index = 0 # Row of experiment in the table of data locations to create a dataset from

# Location of excel file holding experimental annotations 
ann_file = r'A:\projects\keller_vnc\data\manip_event_annotations.xlsx'


# Additional parameters (constant for all datasets) specifying where the data is stored
image_base_folder =r'K:\\SV4'
image_processed_folder = 'Results\\WeightFused'
img_ext = r'weightFused.TimeRegistration.templateSpace.klb'
extracted_folder = 'extracted\\rois_5_25_25'

## Read in excel file specifying location of each dataset

In [5]:
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

data_locs = pd.read_excel(data_loc_file, header=1, usecols=[1, 2], converters=converters)

## Read in excel file with annotations

In [6]:
annotations = pd.read_excel(ann_file, header=0)

## Find the appropriate annotations for this experiment

In [7]:
data_main_folder = data_locs['Main folder'][exp_index]
data_sub_folder = data_locs['Subfolder'][exp_index]

match = re.search('(?P<subject>.+)-561nm.+', data_sub_folder)
if not match:
    raise(RuntimeError('Unable to parse experiment sub-folder name.'))
else:
    subject = match['subject']
    annot_match_str = data_main_folder + '-' + subject
    
annot_rows = annotations['Date and sample'] == annot_match_str
exp_annotations = copy.deepcopy(annotations[annot_rows])

metadata = {'manip_event_annotations': exp_annotations}
frame_rate = 1/exp_annotations['Interval Time'][0]

In [8]:
# Adjust annotations of manipulation start and stop frames for 0 indexing
exp_annotations['Manipulation Start']  = exp_annotations['Manipulation Start'] - 1
exp_annotations['Manipulation End']  = exp_annotations['Manipulation End'] - 1

## Determine where images and extracted data is

In [10]:
img_folder = Path(image_base_folder) / data_main_folder / data_sub_folder / image_processed_folder
extracted_dir =  Path(image_base_folder) / data_main_folder / data_sub_folder / Path(extracted_folder) 

## Read in the file with parameter settings for roi extraction and baseline calculations

In [13]:
with open(extracted_dir / 'extraction_params.pkl', 'rb') as f:
    extract_params = pickle.load(f)

## Create the dataset

In [29]:
roi_values = [{'file': Path(extracted_dir) / 'extracted_f.h5', 'name': 'f_1_10_10'},
              {'file': Path(extracted_dir) / 'baseline_f.h5', 'name': 'baseline_1_10_10'}]
roi_dicts = [{'group_name': '5_25_25', 
              'roi_locs_file': Path(extracted_dir) / 'roi_locs.pkl', 
              'roi_values': roi_values,
              'extra_attributes': {'extract_params': extract_params}}]

In [30]:
dataset = generate_roi_dataset(img_folder=img_folder, img_ext=img_ext, frame_rate=frame_rate, roi_dicts=roi_dicts, 
                               metadata=metadata)

Searching for image files...
Found 5416 images.


In [33]:
e_rois = dataset.extract_rois(roi_group='5_25_25', roi_inds=range(1000), labels=dataset.roi_groups['5_25_25']['ts_labels'])

In [35]:
plot_i = 900
plt.plot(e_rois[plot_i].f_1_10_10)
plt.plot(e_rois[plot_i].baseline_1_10_10)

[<matplotlib.lines.Line2D at 0x139691ee160>]

## Look at videos

In [36]:
conf = pyspark.SparkConf().setMaster('local[20]').setAll([
    ('spark.executor.memory', '10g'), ('spark.driver.memory','400g'), ('spark.driver.maxResultSize', '300g')])
sc = pyspark.SparkContext(conf=conf)

In [39]:
image_inds = slice(200, 300)
video_images = [d['file'] for d in dataset.ts_data['imgs']['vls'][image_inds]]

In [40]:
def max_proj(img):
    return np.max(img, 0)

max_projs = np.asarray(get_processed_image_data(video_images, max_proj, sc=sc))

In [41]:
pg.image(max_projs)

  data = data[sl]


<pyqtgraph.graphicsWindows.ImageWindow at 0x13959f351f8>

## Look at rois - make sure they are in the right places

In [59]:
annot_img = read_img_file(Path(dataset.ts_data['imgs']['vls'][0]['file']))
rois = dataset.roi_groups['5_25_25']['rois'][0:500]

In [60]:
static_viewer = StaticROIViewer(bg_image=annot_img, rois=rois)

In [61]:
static_viewer.init_ui()