Notebook for looking at raw images of datasets around events

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob
from pathlib import Path
import pickle

import numpy as np
import pandas as pd
import pyqtgraph as pg
import pyspark

from janelia_core.dataprocessing.dataset import ROIDataset
from janelia_core.dataprocessing.utils import get_processed_image_data

from keller_zlatic_vnc.data_processing import match_annotation_subject_to_volume_subject
from keller_zlatic_vnc.data_processing import read_raw_transitions_from_excel
from keller_zlatic_vnc.data_processing import recode_beh

In [3]:
%matplotlib qt

## Create a Pyspark context

In [4]:
conf = pyspark.SparkConf().setMaster('local[20]').setAll([
    ('spark.executor.memory', '10g'), ('spark.driver.memory','400g'), ('spark.driver.maxResultSize', '300g')])
sc = pyspark.SparkContext(conf=conf)

## Parameters go here

In [39]:
ps = dict()

# Location of excel file specifying where the data for each experiment is saved relative to the base folder
ps['data_loc_file'] = r'A:\projects\keller_vnc\data\experiment_data_locations.xlsx'

# Location of excel file specifying transition information 
ps['trans_file'] = r'A:\projects\keller_vnc\data\extracted_dff_v2\transition_list.xlsx'

# Subfolder containing the dataset for each subject
ps['dataset_folder'] = 'extracted'

# Base folder where datasets are stored 
ps['dataset_base_folder'] =r'K:\\SV4'


# Index of dataset we want to look at
ps['dataset_index'] = 10

# Index of event we want to look at
ps['event_index'] = 0

# Number of frames we visualize before an event
ps['n_frames_before'] = 1
ps['n_frames_after'] = 2


## Read in excel file specifying location of datasets

In [40]:
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

data_locs = pd.read_excel(ps['data_loc_file'], header=1, usecols=[1, 2], converters=converters)

## Read in transition information

In [41]:
trans = read_raw_transitions_from_excel(ps['trans_file'], adjust_frame_index=True)
trans = recode_beh(trans, 'Beh Before')
trans = recode_beh(trans, 'Beh After')
unique_trans_subjs = trans['Smp ID'].unique()

## Load the dataset if there are annotations for it

In [42]:
data_main_folder = data_locs['Main folder'][ps['dataset_index']]
data_sub_folder = data_locs['Subfolder'][ps['dataset_index']]

match_ind = match_annotation_subject_to_volume_subject(data_main_folder, data_sub_folder, unique_trans_subjs)

if match_ind is not None:
    
    dataset_path = (Path(ps['dataset_base_folder']) / data_main_folder / data_sub_folder / 
                        Path(ps['dataset_folder']) / '*.pkl')
    dataset_file = glob.glob(str(dataset_path))[0]
    
    with open(dataset_file, 'rb') as f:
        dataset = ROIDataset.from_dict(pickle.load(f))
        
    sample_id = unique_trans_subjs[match_ind]
    event_rows = trans['Smp ID'] == sample_id
    sample_events = trans[event_rows]
    
else:
    print('No annotations for dataset.')


## Get images around the event 

In [43]:
start_frame = sample_events.iloc[ps['event_index']]['Manipulation Start'] - ps['n_frames_before']
stop_frame = sample_events.iloc[ps['event_index']]['Manipulation End'] + ps['n_frames_after']
req_frames = slice(start_frame, stop_frame+1)

imgs = [d['file'] for d in dataset.ts_data['imgs']['vls'][req_frames]]

In [44]:
def max_proj(img):
    return np.max(img, 0)

max_projs = np.asarray(get_processed_image_data(imgs, max_proj, sc=sc))

In [45]:
pg.image(max_projs)

<pyqtgraph.graphicsWindows.ImageWindow at 0x1de9c65ddc8>