This is a notebook to test different ways of calculating baselines

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob
from pathlib import Path
import pickle

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from janelia_core.dataprocessing.baseline import percentile_filter_multi_d
from janelia_core.dataprocessing.dataset import ROIDataset
from janelia_core.visualization.volume_visualization import visualize_projs

In [3]:
%matplotlib qt

## Parameters go here

In [5]:
ps = dict()

# Location of excel file specifying where the data for each experiment is saved relative to the base folder
ps['data_loc_file'] = r'A:\projects\keller_vnc\data\experiment_data_locations.xlsx'

# Specify number of datasets to load
ps['n_datasets'] = 5

# Base folder where datasets are stored 
ps['dataset_base_folder'] =r'K:\\SV4'

# Subfolder containing the dataset for each subject
ps['dataset_folder'] = 'extracted'

# Specify fields for the flourescence and original baslines we work with
ps['f_ts_str'] = 'f_4_20_20'
ps['bl_ts_strs'] = ['bl_4_20_20', 'bl_4_20_20_long']

# Specify if we are recalculating baselines
ps['recalc_baselines'] = False

## Read in excel file specifying location of datasets

In [6]:
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

data_locs = pd.read_excel(ps['data_loc_file'], header=1, usecols=[1, 2], converters=converters)

In [7]:
datasets = [None]*ps['n_datasets']
f_means = [None]*ps['n_datasets']
old_baselines = [None]*ps['n_datasets']
old_baseline_means = [None]*ps['n_datasets']
new_baslines = [None]*ps['n_datasets']
new_bl_means = [None]*ps['n_datasets']

for d_i in range(ps['n_datasets']):
    
    # Read in the data for this subject
    data_main_folder = data_locs['Main folder'][d_i]
    data_sub_folder = data_locs['Subfolder'][d_i]
    
    dataset_path = (Path(ps['dataset_base_folder']) / data_main_folder / data_sub_folder / 
                    Path(ps['dataset_folder']) / '*.pkl')
    dataset_file = glob.glob(str(dataset_path))[0]

    # Load the dataset
    with open(dataset_file, 'rb') as f:
            datasets[d_i] = ROIDataset.from_dict(pickle.load(f))
            
    # Calculate mean flourescence
    f_means[d_i] = np.mean(datasets[d_i].ts_data[ps['f_ts_str']]['vls'][:], axis=1)
    
    subj_old_baselines = [None]*len(ps['bl_ts_strs'])
    subj_old_baseline_means = [None]*len(ps['bl_ts_strs'])
    for b_i, old_bl_label in enumerate(ps['bl_ts_strs']):
        subj_old_baselines[b_i] = datasets[d_i].ts_data[old_bl_label]['vls'][:]
        subj_old_baseline_means[b_i] = np.mean(subj_old_baselines[b_i], axis=1)
        
    old_baselines[d_i] = subj_old_baselines
    old_baseline_means[d_i] = subj_old_baseline_means
    
    
    # Load old baselines and calculate their means
    
    # Recalculate baselines
    if ps['recalc_baselines']:
        new_baslines[d_i] = percentile_filter_multi_d(datasets[d_i].ts_data[ps['f_ts_str']]['vls'][:],
                                                      window_length=3001, 
                                                      filter_start=-1500, 
                                                      write_offset=1500, 
                                                      p=.1, n_processes=40)
    
        new_bl_means[d_i] = np.mean(new_baslines[d_i], axis=1)
    
    print('Done loading dataset ' + str(d_i+1) + ' of ' + str(ps['n_datasets']) + '.')
    

Done loading dataset 1 of 5.
Done loading dataset 2 of 5.
Done loading dataset 3 of 5.
Done loading dataset 4 of 5.
Done loading dataset 5 of 5.


In [8]:
plt.figure()
for d_i in range(ps['n_datasets']):
    plt.subplot(ps['n_datasets'], 1, d_i+1)
    
    # Plot mean flourescence
    plt.plot(f_means[d_i])
    
    # Plot old baselines
    for bl_mean in old_baseline_means[d_i]:
        plt.plot(bl_mean)
    
    if ps['recalc_baselines']:
        plt.plot(new_bl_means[d_i] )

## Look at some example rois

In [43]:
roi_i = 5100 # Brain
roi_i = 1007 # VNC 1
#roi_i = 0 # VNC 2

In [44]:
plt.figure()
for d_i in range(ps['n_datasets']):
    plt.subplot(ps['n_datasets'], 1, d_i+1)
    plt.plot(datasets[d_i].ts_data[ps['f_ts_str']]['vls'][:, roi_i])
    
    for old_bl in old_baselines[d_i]:
        plt.plot(old_bl[:, roi_i])
    
    if ps['recalc_baselines']:
        plt.plot(new_baslines[d_i][:, roi_i])

## Visualize the location of the ROI we are visualizing activity for

In [45]:
roi_locs = [roi.voxel_inds for roi in datasets[0].roi_groups['rois_4_20_20']['rois']]
roi_loc = roi_locs[roi_i]

In [46]:
mn_image = datasets[0].stats['mean']

In [47]:
roi_image = np.nan*np.zeros_like(mn_image)
roi_image[roi_loc[0], roi_loc[1], roi_loc[2]] = 1

In [48]:
axial_mn_im = np.mean(datasets[0].stats['mean'], 0)
axial_roi_im = np.nanmean(roi_image, 0)

coronal_mn_im = np.fliplr(np.mean(datasets[0].stats['mean'], 1).transpose())
coronal_roi_im = np.fliplr(np.nanmean(roi_image, 1).transpose())

sag_mn_im = np.mean(datasets[0].stats['mean'], 2).transpose()
sag_roi_im = np.nanmean(roi_image, 2).transpose()

In [51]:
visualize_projs(horz_projs=[axial_mn_im, axial_roi_im], 
                sag_projs=[sag_mn_im, sag_roi_im], 
                cor_projs=[coronal_mn_im, coronal_roi_im], 
                cmaps=['gray', 'Blues'], clims=[[0, 130], [0, .5]], dim_m=[1, 1, 3], 
                buffer=0.0, tgt_h=5)

In [42]:
mn_image.shape

(86, 851, 509)