A notebook to make p-value images from the results of whole_bain_linear_mdl_fit

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import glob
import os
from pathlib import Path
import pickle

import imageio
import numpy as np
import pandas as pd
import pyqtgraph as pg

from janelia_core.stats.regression import grouped_linear_regression_boot_strap_stats
from janelia_core.dataprocessing.dataset import ROIDataset
from janelia_core.utils.data_saving import append_ts

In [3]:
%matplotlib qt

## Parameters go here

In [62]:
ps = {}

# Location of results of whole_brain_linear_mdl_fit
ps['results_folder'] = r'\\dm11\bishoplab\projects\keller_vnc\results\whole_brain_stats'
#ps['results_file'] = 'whole_brain_boot_strap_2019_10_29_10_19_57_489328.pkl'
ps['results_file'] = 'whole_brain_boot_strap_2020_01_25_15_30_35_692930.pkl'

# The alpha level for forming confidence intervals (we don't look at confidence intervals but still compute them)
ps['alpha'] = .01

# Specify type of images we generate
ps['type'] = 'threshold' # threshold or p-value

# Threshold p-value if we are making threshold images
ps['threshold'] = .05

# Specify where the original datasets are located - we use these for determining the position of the rois
ps['data_loc_file'] = r'A:\projects\keller_vnc\data\experiment_data_locations.xlsx'
ps['dataset_folder'] = 'extracted'
ps['dataset_base_folder'] = r'K:\\SV4'
ps['roi_group'] = 'rois_1_5_5'

# Specify where we save images
ps['save_folder'] = r'\\dm11\bishoplab\projects\keller_vnc\results\whole_brain_stats'
ps['save_str'] = 'whole_brain_stats_images'


## Load the results

In [5]:
rs_file = Path(ps['results_folder']) / ps['results_file']
with open(rs_file, 'rb') as f:
    rs = pickle.load(f)
    
one_hot_vars = rs['one_hot_vars']
before_bs_rs = rs['before_bs_rs']
after_bs_rs = rs['after_bs_rs']

## Load a dataset

Because the rois are in the same location for each dataset, we can just look at the first dataset to find the position of the rois

In [6]:
# Read in dataset locations
def c_fcn(str):
    return str.replace("'", "")
converters = {0:c_fcn, 1:c_fcn}

data_locs = pd.read_excel(ps['data_loc_file'], header=1, usecols=[1, 2], converters=converters)

# Read in the first dataset
dataset_path = (Path(ps['dataset_base_folder']) / data_locs['Main folder'][0] / data_locs['Subfolder'][0] / 
                    Path(ps['dataset_folder']) / '*.pkl')
dataset_file = glob.glob(str(dataset_path))[0]

with open(dataset_file, 'rb') as f:
    dataset = ROIDataset.from_dict(pickle.load(f))

In [7]:
dataset_file

'K:\\SV4\\CW_17-08-23\\L1-561nm-ROIMonitoring_20170823_145226.corrected\\extracted\\CW_17-08-23-L1_dataset.pkl'

## Get ROI locations for first dataset

In [8]:
rois = dataset.roi_groups[ps['roi_group']]['rois']
if len(rois) != len(before_bs_rs):
    raise(RuntimeError('Number of rois in dataset does not match number of rois statistics are calculated for.'))
n_rois = len(rois)

## Calculate statistics

In [14]:
before_stats = [None]*n_rois
after_stats = [None]*n_rois
for rs_i, (before_rs, after_rs) in enumerate(zip(before_bs_rs, after_bs_rs)):
    before_stats[rs_i] = grouped_linear_regression_boot_strap_stats(before_rs[0], ps['alpha'])
    after_stats[rs_i] = grouped_linear_regression_boot_strap_stats(after_rs[0], ps['alpha'])
    if rs_i % 10000 == 0: 
        print('Done with ' + str(rs_i) + ' rois.')

Done with 0 rois.
Done with 10000 rois.
Done with 20000 rois.
Done with 30000 rois.
Done with 40000 rois.
Done with 50000 rois.
Done with 60000 rois.
Done with 70000 rois.
Done with 80000 rois.
Done with 90000 rois.
Done with 100000 rois.
Done with 110000 rois.
Done with 120000 rois.
Done with 130000 rois.
Done with 140000 rois.
Done with 150000 rois.
Done with 160000 rois.
Done with 170000 rois.
Done with 180000 rois.
Done with 190000 rois.
Done with 200000 rois.
Done with 210000 rois.
Done with 220000 rois.
Done with 230000 rois.
Done with 240000 rois.
Done with 250000 rois.
Done with 260000 rois.
Done with 270000 rois.
Done with 280000 rois.
Done with 290000 rois.
Done with 300000 rois.
Done with 310000 rois.
Done with 320000 rois.
Done with 330000 rois.
Done with 340000 rois.
Done with 350000 rois.
Done with 360000 rois.
Done with 370000 rois.
Done with 380000 rois.


## Load mean image

In [15]:
mn_img = dataset.stats['mean']

## Generate and save images

In [63]:
# Make folder to save results into
image_folder = append_ts(ps['save_str'])
save_folder_path= Path(ps['save_folder']) / image_folder
os.makedirs(save_folder_path)

In [64]:
# Save the mean image
mn_image_path = save_folder_path / 'mean.tiff'
imageio.mimwrite(mn_image_path, mn_img)

In [65]:
# Generate p-value images
im_shape = mn_img.shape

n_vars = len(one_hot_vars)

before_p_images = [None]*n_vars
after_p_images = [None]*n_vars

for v_i in range(n_vars):
    after_im = np.zeros(im_shape, dtype=np.float32)
    before_im = np.zeros(im_shape, dtype=np.float32)
    cur_var = one_hot_vars[v_i]
    for r_i in range(n_rois):
        cur_voxel_inds = rois[r_i].voxel_inds
        if ps['type'] == 'p-value':
            after_im[cur_voxel_inds] = 1*np.log10(after_stats[r_i]['non_zero_p'][v_i])
            before_im[cur_voxel_inds] = 1*np.log10(before_stats[r_i]['non_zero_p'][v_i])
        elif ps['type'] == 'threshold':
            if after_stats[r_i]['non_zero_p'][v_i] < ps['threshold']:
                after_im[cur_voxel_inds] = after_bs_rs[r_i][1][v_i]
            if before_stats[r_i]['non_zero_p'][v_i] < ps['threshold']:
                before_im[cur_voxel_inds] = before_bs_rs[r_i][1][v_i]
        else:
            raise(ValueError('type not recogonized'))
    
    after_p_images[v_i] = after_im 
    before_p_images[v_i] = before_im
    print('Done with variable ' + str(v_i))

Done with variable 0
Done with variable 1
Done with variable 2
Done with variable 3
Done with variable 4
Done with variable 5
Done with variable 6
Done with variable 7
Done with variable 8
Done with variable 9
Done with variable 10
Done with variable 11
Done with variable 12
Done with variable 13
Done with variable 14
Done with variable 15


In [66]:
# Save p-value images

after_folder_path = save_folder_path / 'after'
before_folder_path = save_folder_path / 'before'

os.makedirs(after_folder_path)
os.makedirs(before_folder_path)

for v_i in range(n_vars):
    after_im_name = one_hot_vars[v_i] + '_after.tiff'
    before_im_name = one_hot_vars[v_i] + '_before.tiff'
    
    after_im_path = after_folder_path/after_im_name
    before_im_path = before_folder_path/before_im_name
    
    imageio.mimwrite(after_im_path, after_p_images[v_i])
    imageio.mimwrite(before_im_path, before_p_images[v_i])

In [67]:
# Save parameters
param_path = save_folder_path / 'params.pkl'
with open(param_path, 'wb') as f:
    pickle.dump(ps, f)

In [68]:
one_hot_vars

['beh_before_Q',
 'beh_before_F',
 'beh_before_B',
 'beh_after_Q',
 'beh_after_F',
 'beh_after_B',
 'beh_interact_QQ',
 'beh_interact_QF',
 'beh_interact_QB',
 'beh_interact_FQ',
 'beh_interact_FF',
 'beh_interact_FB',
 'beh_interact_BQ',
 'beh_interact_BF',
 'beh_interact_BB',
 'mean']

In [55]:
save_folder_path

WindowsPath('//dm11/bishoplab/projects/keller_vnc/results/whole_brain_stats/whole_brain_stats_images_2020_01_27_17_12_32_247326')

In [None]:
-np.log(.002)

In [31]:
len(np.where(after_p_images[0] != 0)[0])

9614250

In [33]:
np.min(after_p_images[0])

-0.0019980026

In [35]:
np.log10(.1)

-1.0

In [46]:
before_bs_rs[0]

[array([[-0.00506909,  0.01081913,  0.00747783, ..., -0.00392158,
          0.00539139,  0.01322787],
        [ 0.0002863 ,  0.01168858,  0.00331868, ...,  0.00669039,
         -0.0033717 ,  0.01529356],
        [-0.00324385,  0.00775177,  0.00725069, ...,  0.00022119,
          0.0070295 ,  0.01175862],
        ...,
        [-0.00767275,  0.01340322,  0.0083324 , ...,  0.00101086,
          0.00105604,  0.01406287],
        [-0.00282925,  0.01345026,  0.00492187, ...,  0.00047838,
         -0.00213395,  0.01554288],
        [-0.00208054,  0.01035475,  0.0063716 , ...,  0.00094513,
          0.00542647,  0.0146458 ]]),
 array([-0.00333398,  0.01202381,  0.00692496,  0.00738006,  0.00465503,
         0.0035797 ,  0.00253408, -0.00094464, -0.00492342,  0.        ,
         0.00408508,  0.00793872,  0.00484598,  0.00151458,  0.0005644 ,
         0.01561478])]