In [1]:
import matplotlib
matplotlib.use('agg')
import h5py
import os
import json
import cv2
import time
import math
import optparse
import pandas as pd
import numpy as np
import pylab as pl
import matplotlib as mpl
import seaborn as sns
import pyvttbl as pt
import multiprocessing as mp
import tifffile as tf
from collections import namedtuple
from scipy import stats
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
import scikit_posthocs as sp

from pipeline.python.utils import natural_keys, replace_root, print_elapsed_time
import pipeline.python.traces.combine_runs as cb
import pipeline.python.paradigm.align_acquisition_events as acq
import pipeline.python.visualization.plot_psths_from_dataframe as vis
from pipeline.python.traces.utils import load_TID


In [2]:

#%%
def load_roi_dataframe(roidata_filepath):

    fn_parts = os.path.split(roidata_filepath)[-1].split('_')
    roidata_hash = fn_parts[1]
    trace_type = os.path.splitext(fn_parts[-1])[0]

    df_list = []

    df = pd.HDFStore(roidata_filepath, 'r')
    datakeys = df.keys()
    if 'roi' in datakeys[0]:
        for roi in datakeys:
            if '/' in roi:
                roiname = roi[1:]
            else:
                roiname = roi
            dfr = df[roi]
            dfr['roi'] = pd.Series(np.tile(roiname, (len(dfr .index),)), index=dfr.index)
            df_list.append(dfr)
        DATA = pd.concat(df_list, axis=0, ignore_index=True)
        datakey = '%s_%s' % (trace_type, roidata_hash)
    else:
        print "Found %i datakeys" % len(datakeys)
        datakey = datakeys[0]
        df.close()
        del df
        DATA = pd.read_hdf(roidata_filepath, datakey)
        #DATA = df[datakey]

    return DATA, datakey

#%%

def pyvt_raw_epochXoriXsf(rdata, trans_types, save_fig=False, output_dir='/tmp', fname='boxplot(intensity~epoch_X_config).png'):

    # Make sure trans_types are sorted:
    trans_types = sorted(trans_types, key=natural_keys)

    assert len(list(set(rdata['nframes_on'])))==1, "More than 1 idx found for nframes on... %s" % str(list(set(rdata['nframes_on'])))
    assert len(list(set(rdata['first_on'])))==1, "More than 1 idx found for first frame on... %s" % str(list(set(rdata['first_on'])))

    nframes_on = int(round(list(set(rdata['nframes_on']))[0]))
    first_on =  int(round(list(set(rdata['first_on']))[0]))

    df_groups = np.copy(trans_types).tolist()
    df_groups.extend(['trial', 'raw'])
    groupby_list = np.copy(trans_types).tolist()
    groupby_list.extend(['trial'])

    currdf = rdata[df_groups] #.sort_values(trans_types)
    grp = currdf.groupby(groupby_list)
    config_trials = {} # dict((config, []) for config in list(set(currdf['config'])))
    for k,g in grp: #config_trials.keys():
        if k[0] not in config_trials.keys():
            config_trials[k[0]] = {}
        if k[1] not in config_trials[k[0]].keys():
            config_trials[k[0]][k[1]] = {}

        config_trials[k[0]][k[1]] = sorted(list(set(currdf.loc[(currdf['ori']==k[0])
                                                        & (currdf['sf']==k[1])]['trial'])), key=natural_keys)

    idx = 0
    df_list = []
    for k,g in grp:
        #print k
        base_mean= g['raw'][0:first_on].mean()
        base_std = g['raw'][0:first_on].std()
        stim_mean = g['raw'][first_on:first_on+nframes_on].mean()

        df_list.append(pd.DataFrame({'ori': k[0],
                                     'sf': k[1],
                                     'trial': 'trial%05d' % int(config_trials[k[0]][k[1]].index(k[2]) + 1),
                                     'epoch': 'baseline',
                                     'intensity': base_mean}, index=[idx]))
        df_list.append(pd.DataFrame({'ori': k[0],
                                     'sf': k[1],
                                     'trial': 'trial%05d' % int(config_trials[k[0]][k[1]].index(k[2]) + 1),
                                     'epoch': 'stimulus',
                                     'intensity': stim_mean}, index=[idx+1]))
        idx += 2
    df = pd.concat(df_list, axis=0)
    df = df.sort_values(['epoch', 'ori', 'sf'])
    df = df.reset_index(drop=True)
    #pdf.pivot_table(index=['trial'], columns=['config', 'epoch'], values='intensity')

    # Format pandas df into pyvttbl dataframe:
    df_factors = np.copy(trans_types).tolist()
    df_factors.extend(['trial', 'epoch', 'intensity'])

    Trial = namedtuple('Trial', df_factors)
    pdf = pt.DataFrame()
    for idx in xrange(df.shape[0]):
        pdf.insert(Trial(df.loc[idx, 'ori'],
                         df.loc[idx, 'sf'],
                         df.loc[idx, 'trial'],
                         df.loc[idx, 'epoch'],
                         df.loc[idx, 'intensity'])._asdict())

    if save_fig:
        factor_list = np.copy(trans_types).tolist()
        factor_list.extend(['epoch'])
        pdf.box_plot('intensity', factors=factor_list, fname=fname, output_dir=output_dir)

    return pdf
#%%
def pyvt_stimdf_oriXsf(rdata, trans_types, save_fig=False, output_dir='/tmp', fname='dff_boxplot(intensity~epoch_X_config).png'):

    # Make sure trans_types are sorted:
    trans_types = sorted(trans_types, key=natural_keys)

    assert len(list(set(rdata['nframes_on'])))==1, "More than 1 idx found for nframes on... %s" % str(list(set(rdata['nframes_on'])))
    assert len(list(set(rdata['first_on'])))==1, "More than 1 idx found for first frame on... %s" % str(list(set(rdata['first_on'])))

    nframes_on = int(round(list(set(rdata['nframes_on']))[0]))
    first_on =  int(round(list(set(rdata['first_on']))[0]))

    df_groups = np.copy(trans_types).tolist()
    df_groups.extend(['trial', 'df'])
    groupby_list = np.copy(trans_types).tolist()
    groupby_list.extend(['trial'])

    currdf = rdata[df_groups] #.sort_values(trans_types)
    grp = currdf.groupby(groupby_list)
    config_trials = {} # dict((config, []) for config in list(set(currdf['config'])))
    for k,g in grp: #config_trials.keys():
        if k[0] not in config_trials.keys():
            config_trials[k[0]] = {}
        if k[1] not in config_trials[k[0]].keys():
            config_trials[k[0]][k[1]] = {}

        config_trials[k[0]][k[1]] = sorted(list(set(currdf.loc[(currdf['ori']==k[0])
                                                        & (currdf['sf']==k[1])]['trial'])), key=natural_keys)

    idx = 0
    df_list = []
    for k,g in grp:
        #print k
        base_mean= g['df'][0:first_on].mean()
        base_std = g['df'][0:first_on].std()
        stim_mean = g['df'][first_on:first_on+nframes_on].mean()
        zscore_val = stim_mean / base_std

        df_list.append(pd.DataFrame({'ori': k[0],
                                     'sf': k[1],
                                     'trial': k[2], #'trial%05d' % int(config_trials[k[0]][k[1]].index(k[2]) + 1),
                                     'dff': stim_mean,
                                     'zscore': zscore_val}, index=[idx]))

        idx += 1
    df = pd.concat(df_list, axis=0)
    df = df.sort_values(trans_types)
    df = df.reset_index(drop=True)
    #pdf.pivot_table(index=['trial'], columns=['config', 'epoch'], values='intensity')

    # Format pandas df into pyvttbl dataframe:
    df_factors = np.copy(trans_types).tolist()
    df_factors.extend(['trial', 'dff'])

    Trial = namedtuple('Trial', df_factors)
    pdf = pt.DataFrame()
    for idx in xrange(df.shape[0]):
        pdf.insert(Trial(df.loc[idx, 'ori'],
                         df.loc[idx, 'sf'],
                         df.loc[idx, 'trial'],
                         df.loc[idx, 'dff'])._asdict())

    if save_fig:
        factor_list = np.copy(trans_types).tolist()
        pdf.box_plot('dff', factors=factor_list, fname=fname, output_dir=output_dir)

    return pdf

#%%
def pyvt_stimdf_configs(rdata, save_fig=False, output_dir='/tmp', fname='boxplot(intensity~epoch_X_config).png'):

    '''
    Take single ROI as a datatset, do split-plot rmANOVA:
        within-trial factor :  baseline vs. stimulus epoch
        between-trial factor :  stimulus condition
    '''

    assert len(list(set(rdata['nframes_on'])))==1, "More than 1 idx found for nframes on... %s" % str(list(set(rdata['nframes_on'])))
    assert len(list(set(rdata['first_on'])))==1, "More than 1 idx found for first frame on... %s" % str(list(set(rdata['first_on'])))

    nframes_on = int(round(list(set(rdata['nframes_on']))[0]))
    first_on =  int(round(list(set(rdata['first_on']))[0]))

    df_groups = ['config', 'trial', 'df']
    groupby_list = ['config', 'trial']

    currdf = rdata[df_groups] #.sort_values(trans_types)
    grp = currdf.groupby(groupby_list)
    config_trials = {} # dict((config, []) for config in list(set(currdf['config'])))
    for k,g in grp: #config_trials.keys():
        if k[0] not in config_trials.keys():
            config_trials[k[0]] = {}

        config_trials[k[0]] = sorted(list(set(currdf.loc[currdf['config']==k[0]]['trial'])), key=natural_keys)

    idx = 0
    df_list = []
    for k,g in grp:
        #print k
        base_mean= g['df'][0:first_on].mean()
        base_std = g['df'][0:first_on].std()
        stim_mean = g['df'][first_on:first_on+nframes_on].mean()

        df_list.append(pd.DataFrame({'config': k[0],
                                     'trial': k[1], #'trial%05d' % int(config_trials[k[0]].index(k[1]) + 1),
                                     'dff': stim_mean}, index=[idx]))
        idx += 1
    df = pd.concat(df_list, axis=0)
    df = df.sort_values(['config'])
    df = df.reset_index(drop=True)

    #pdf.pivot_table(index=['trial'], columns=['config', 'epoch'], values='intensity')

    # Format pandas df into pyvttbl dataframe:
    df_factors = ['config', 'trial', 'dff']

    Trial = namedtuple('Trial', df_factors)
    pdf = pt.DataFrame()
    for idx in xrange(df.shape[0]):
        pdf.insert(Trial(df.loc[idx, 'config'],
                         df.loc[idx, 'trial'],
                         df.loc[idx, 'dff'])._asdict())

    if save_fig:
        factor_list = ['config']
        pdf.box_plot('dff', factors=factor_list, fname=fname, output_dir=output_dir)

    return pdf

#%%
def roidata_to_df_configs(rdata):

    '''
    Take subset of full ROIDATA dataframe using specified columns.
    Convert DF to make compatible w/ pyvttbl (and other).
    '''

    assert len(list(set(rdata['nframes_on'])))==1, "More than 1 idx found for nframes on... %s" % str(list(set(rdata['nframes_on'])))
    assert len(list(set(rdata['first_on'])))==1, "More than 1 idx found for first frame on... %s" % str(list(set(rdata['first_on'])))

    nframes_on = int(round(list(set(rdata['nframes_on']))[0]))
    first_on =  int(round(list(set(rdata['first_on']))[0]))

    df_groups = ['config', 'trial', 'df']
    groupby_list = ['config', 'trial']

    currdf = rdata[df_groups] #.sort_values(trans_types)
    grp = currdf.groupby(groupby_list)
    config_trials = {} # dict((config, []) for config in list(set(currdf['config'])))
    for k,g in grp: #config_trials.keys():
        if k[0] not in config_trials.keys():
            config_trials[k[0]] = {}

        config_trials[k[0]] = sorted(list(set(currdf.loc[currdf['config']==k[0]]['trial'])), key=natural_keys)

    idx = 0
    df_list = []
    for k,g in grp:
        #print k
        base_mean= g['df'][0:first_on].mean()
        base_std = g['df'][0:first_on].std()
        stim_mean = g['df'][first_on:first_on+nframes_on].mean()

        df_list.append(pd.DataFrame({'config': k[0],
                                     'trial': k[1], #'trial%05d' % int(config_trials[k[0]].index(k[1]) + 1),
                                     'dff': stim_mean,
                                     'zscore': stim_mean / base_std}, index=[idx]))
        idx += 2
    df = pd.concat(df_list, axis=0)
    df = df.sort_values(['config'])
    df = df.reset_index(drop=True)

    return df

#%%

def roidata_to_df_transforms(rdata, trans_types):

    '''
    Take subset of full ROIDATA dataframe using specified columns.
    Convert DF to make compatible w/ pyvttbl (and other).
    '''

    assert len(list(set(rdata['nframes_on'])))==1, "More than 1 idx found for nframes on... %s" % str(list(set(rdata['nframes_on'])))
    assert len(list(set(rdata['first_on'])))==1, "More than 1 idx found for first frame on... %s" % str(list(set(rdata['first_on'])))

    nframes_on = int(round(list(set(rdata['nframes_on']))[0]))
    first_on =  int(round(list(set(rdata['first_on']))[0]))

    # Check if 'xpos' or 'ypos' in trans_types, replace with 'position':
#    if 'xpos' in trans_types or 'ypos' in trans_types:
#        trans_types.extend(['position'])
#        trans_types = [t for t in trans_types if not (t == 'xpos') and not (t == 'ypos')]
    # Make sure trans_types sorted:
    trans_types = sorted(trans_types, key=natural_keys)

    df_groups = np.copy(trans_types).tolist()
    df_groups.extend(['trial', 'df'])
    currdf = rdata[df_groups] #.sort_values(trans_types)

    groupby_list = np.copy(trans_types).tolist()
    groupby_list.extend(['trial'])
    grp = currdf.groupby(groupby_list)
#    config_trials = {} # dict((config, []) for config in list(set(currdf['config'])))
#    for k,g in grp: #config_trials.keys():
#        if k[0] not in config_trials.keys():
#            config_trials[k[0]] = {}
#        config_trials[k[0]] = sorted(list(set(currdf.loc[currdf['config']==k[0]]['trial'])), key=natural_keys)

    idx = 0
    df_list = []
    for k,g in grp:
        #print k
        base_mean= g['df'][0:first_on].mean()
        base_std = g['df'][0:first_on].std()
        stim_mean = g['df'][first_on:first_on+nframes_on].mean()

        tdict = {'trial': k[-1],
                 'dff': stim_mean,
                 'zscore': stim_mean / base_std}
        for dkey in range(len(k)-1):
            tdict[trans_types[dkey]] = k[dkey]

        df_list.append(pd.DataFrame(tdict, index=[idx]))

        idx += 1

    df = pd.concat(df_list, axis=0)
    df = df.sort_values(trans_types)
    df = df.reset_index(drop=True)

    return df

#%%

def roidata_to_factors(rdata):

    '''
    Take subset of full ROIDATA dataframe using specified columns.
    Create DF of shape:  rows = replications (subjects, trials, etc.)
                         columns = conditions (A1B1, A1B2, A1B3, ... A1Bb, A2B1, A2B2, ... A2Bb, ... AaBb)
                         where a = num levels for factor A (epoch: baseline vs stimulus period),
                               b = num levels for factor B (config)
    '''

    assert len(list(set(rdata['nframes_on'])))==1, "More than 1 idx found for nframes on... %s" % str(list(set(rdata['nframes_on'])))
    assert len(list(set(rdata['first_on'])))==1, "More than 1 idx found for first frame on... %s" % str(list(set(rdata['first_on'])))


    nframes_on = int(round(list(set(rdata['nframes_on']))[0]))
    first_on =  int(round(list(set(rdata['first_on']))[0]))

    df_groups = ['config', 'trial', 'raw']

    currdf = rdata[df_groups] #.sort_values(trans_types)

    grp = currdf.groupby(['config', 'trial'])

    conditions = list(set(currdf['config']))
    df_list = []
    for cond in conditions:
        curr_trials = [k for k in grp.groups.keys() if k[0] == cond]
        baseline_vals = [grp.get_group(t)['raw'][0:first_on].mean() for t in curr_trials]
        stimulus_vals = [grp.get_group(t)['raw'][first_on:first_on+nframes_on].mean() for t in curr_trials]
        df_list.append(pd.DataFrame({'bas_%s' % cond: baseline_vals},
                                     index=np.arange(0,len(curr_trials))
                                     ))
        df_list.append(pd.DataFrame({'stim_%s' % cond: stimulus_vals},
                                     index=np.arange(0,len(curr_trials))
                                     ))
    df = pd.concat(df_list, axis=1)

    return df

#D = {}
#D[roi] = df.values[0:-1,:]
#mfile_path = os.path.join('/home', 'juliana', 'Downloads', 'rdata1.mat')
#scipy.io.savemat(mfile_path, D)

#%%
def roidata_to_epochXconfig(rdata, save_fig=False, output_dir='/tmp', fname='boxplot(intensity~epoch_X_config).png'):

    '''
    Take single ROI as a datatset, do split-plot rmANOVA:
        within-trial factor :  baseline vs. stimulus epoch
        between-trial factor :  stimulus condition
    '''

    assert len(list(set(rdata['nframes_on'])))==1, "More than 1 idx found for nframes on... %s" % str(list(set(rdata['nframes_on'])))
    assert len(list(set(rdata['first_on'])))==1, "More than 1 idx found for first frame on... %s" % str(list(set(rdata['first_on'])))

    nframes_on = int(round(list(set(rdata['nframes_on']))[0]))
    first_on =  int(round(list(set(rdata['first_on']))[0]))

    df_groups = ['config', 'trial', 'raw']
    groupby_list = ['config', 'trial']

    currdf = rdata[df_groups] #.sort_values(trans_types)
    grp = currdf.groupby(groupby_list)
    config_trials = {} # dict((config, []) for config in list(set(currdf['config'])))
    for k,g in grp: #config_trials.keys():
        if k[0] not in config_trials.keys():
            config_trials[k[0]] = {}

        config_trials[k[0]] = sorted(list(set(currdf.loc[currdf['config']==k[0]]['trial'])), key=natural_keys)

    idx = 0
    df_list = []
    for k,g in grp:
        #print k
        base_mean= g['raw'][0:first_on].mean()
        base_std = g['raw'][0:first_on].std()
        stim_mean = g['raw'][first_on:first_on+nframes_on].mean()

        df_list.append(pd.DataFrame({'config': k[0], #str(k[0]),
                                     'trial': k[1], #str(k[1]), #'trial%05d' % int(config_trials[k[0]].index(k[1]) + 1),
                                     'epoch': 'baseline',
                                     'intensity': base_mean}, index=[idx]))
        df_list.append(pd.DataFrame({'config': k[0],
                                     'trial': k[1], #'trial%05d' % int(config_trials[k[0]].index(k[1]) + 1),
                                     'epoch': 'stimulus',
                                     'intensity': stim_mean}, index=[idx+1]))
        idx += 2
    df = pd.concat(df_list, axis=0)
    df = df.sort_values(['epoch', 'config'])
    df = df.reset_index(drop=True)

    return df


#%%
def pd_to_pyvtt_transforms(df, trans_types):

    # Format pandas df into pyvttbl dataframe:
    #df_factors = ['config', 'trial', 'dff']
    df_factors = np.copy(trans_types).tolist()
    df_factors.extend(['trial', 'dff'])

    Trial = namedtuple('Trial', df_factors)
    pdf = pt.DataFrame()
    for idx in xrange(df.shape[0]):
        if len(trans_types)==1:
            pdf.insert(Trial(df.loc[idx, trans_types[0]],
                             df.loc[idx, 'trial'],
                             df.loc[idx, 'dff'])._asdict())
        elif len(trans_types)==2:
            pdf.insert(Trial(df.loc[idx, trans_types[0]],
                             df.loc[idx, trans_types[1]],
                             df.loc[idx, 'trial'],
                             df.loc[idx, 'dff'])._asdict())
        elif len(trans_types)== 3:
            pdf.insert(Trial(df.loc[idx, trans_types[0]],
                             df.loc[idx, trans_types[1]],
                             df.loc[idx, trans_types[2]],
                             df.loc[idx, 'trial'],
                             df.loc[idx, 'dff'])._asdict())

    return pdf



#%%
def pyvt_raw_epochXconfig(rdata, save_fig=False, output_dir='/tmp', fname='boxplot(intensity~epoch_X_config).png'):

    '''
    Take single ROI as a datatset, do split-plot rmANOVA:
        within-trial factor :  baseline vs. stimulus epoch
        between-trial factor :  stimulus condition
    '''

    assert len(list(set(rdata['nframes_on'])))==1, "More than 1 idx found for nframes on... %s" % str(list(set(rdata['nframes_on'])))
    assert len(list(set(rdata['first_on'])))==1, "More than 1 idx found for first frame on... %s" % str(list(set(rdata['first_on'])))

    nframes_on = int(round(list(set(rdata['nframes_on']))[0]))
    first_on =  int(round(list(set(rdata['first_on']))[0]))

    df_groups = ['config', 'trial', 'raw']
    groupby_list = ['config', 'trial']

    currdf = rdata[df_groups] #.sort_values(trans_types)
    grp = currdf.groupby(groupby_list)
    config_trials = {} # dict((config, []) for config in list(set(currdf['config'])))
    for k,g in grp: #config_trials.keys():
        if k[0] not in config_trials.keys():
            config_trials[k[0]] = {}

        config_trials[k[0]] = sorted(list(set(currdf.loc[currdf['config']==k[0]]['trial'])), key=natural_keys)

    idx = 0
    df_list = []
    for k,g in grp:
        #print k
        base_mean= g['raw'][0:first_on].mean()
        base_std = g['raw'][0:first_on].std()
        stim_mean = g['raw'][first_on:first_on+nframes_on].mean()

        df_list.append(pd.DataFrame({'config': k[0], #str(k[0]),
                                     'trial': k[1], #str(k[1]), #'trial%05d' % int(config_trials[k[0]].index(k[1]) + 1),
                                     'epoch': 'baseline',
                                     'intensity': base_mean}, index=[idx]))
        df_list.append(pd.DataFrame({'config': k[0],
                                     'trial': k[1], #'trial%05d' % int(config_trials[k[0]].index(k[1]) + 1),
                                     'epoch': 'stimulus',
                                     'intensity': stim_mean}, index=[idx+1]))
        idx += 2
    df = pd.concat(df_list, axis=0)
    df = df.sort_values(['epoch', 'config'])
    df = df.reset_index(drop=True)

    #pdf.pivot_table(index=['trial'], columns=['config', 'epoch'], values='intensity')

    # Format pandas df into pyvttbl dataframe:
    df_factors = ['config', 'trial', 'epoch', 'intensity']

    Trial = namedtuple('Trial', df_factors)
    pdf = pt.DataFrame()
    for idx in xrange(df.shape[0]):
        pdf.insert(Trial(df.loc[idx, 'config'],
                         df.loc[idx, 'trial'],
                         df.loc[idx, 'epoch'],
                         df.loc[idx, 'intensity'])._asdict())

    if save_fig:
        factor_list = ['config', 'epoch']
        pdf.box_plot('intensity', factors=factor_list, fname=fname, output_dir=output_dir)

    return pdf


#%%
def pyvt_raw_epochXsinglecond(rdata, curr_config='config001'):

    '''
    Treat single condition for single ROI as dataset, and do ANOVA with 'epoch' as factor.
    Test for main-effect of trial epoch -- but this is just 1-way...?
    '''

    assert len(list(set(rdata['nframes_on'])))==1, "More than 1 idx found for nframes on... %s" % str(list(set(rdata['nframes_on'])))
    assert len(list(set(rdata['first_on'])))==1, "More than 1 idx found for first frame on... %s" % str(list(set(rdata['first_on'])))

    nframes_on = int(round(list(set(rdata['nframes_on']))[0]))
    first_on =  int(round(list(set(rdata['first_on']))[0]))

    df_groups = ['config', 'trial', 'raw']
    groupby_list = ['config', 'trial']

    currdf = rdata[df_groups]

    # Split DF by current stimulus config:
    currdf = currdf[currdf['config']==curr_config]

    grp = currdf.groupby(groupby_list)
    trial_list = sorted(list(set(currdf['trial'])), key=natural_keys)

    idx = 0
    df_list = []
    for k,g in grp:
        print k
        base_mean= g['raw'][0:first_on].mean()
        base_std = g['raw'][0:first_on].std()
        stim_mean = g['raw'][first_on:first_on+nframes_on].mean()

        df_list.append(pd.DataFrame({'config': k[0],
                                     'trial': 'trial%05d' % int(trial_list.index(k[1]) + 1),
                                     'epoch': 'baseline',
                                     'intensity': base_mean}, index=[idx]))
        df_list.append(pd.DataFrame({'config': k[0],
                                     'trial': 'trial%05d' % int(trial_list.index(k[1]) + 1),
                                     'epoch': 'stimulus',
                                     'intensity': stim_mean}, index=[idx+1]))
        idx += 2
    df = pd.concat(df_list, axis=0)
    idxs = pd.Index(xrange(0, len(df)))
    df = df.sort_values(['epoch', 'config'])
    df = df.reset_index(drop=True)

    #pdf.pivot_table(index=['trial'], columns=['config', 'epoch'], values='intensity')

    # Format pandas df into pyvttbl dataframe:
    df_factors = ['trial', 'epoch', 'intensity']

    Trial = namedtuple('Trial', df_factors)
    pdf = pt.DataFrame()
    for idx in xrange(df.shape[0]):
        pdf.insert(Trial(df.loc[idx, 'trial'],
                         df.loc[idx, 'epoch'],
                         df.loc[idx, 'intensity'])._asdict())

    return pdf


#%%

def extract_apa_anova2(factor, aov, values = ['F', 'mse', 'eta', 'p', 'df']):

    results = {}

    if not isinstance(factor, list):
        factor = [factor]

    for fac in factor:
        fmtresults = {}
        for key,result in aov[(fac)].iteritems():
            if key in values:
                fmtresults[key] = result

        fmtresults['dim'] = aov.D

        # Calculate partial-eta2:
        fmtresults['eta2_p'] = aov[(fac)]['ss'] / ( aov[(fac)]['ss'] + aov[(fac)]['sse'] )

        results[fac] = fmtresults

    if len(results.keys()) == 1:
        results = results[results.keys()[0]]

    return results

#%%
import statsmodels.api as sm
from statsmodels.formula.api import ols

def LM_mixed(roi, rdata, output_dir = '/tmp', asdict=True):

    df = roidata_to_epochXconfig(rdata) # re-format data so each factor is a column

    formula = 'intensity ~ C(epoch) + C(config) + C(epoch):C(config)'

    #model_anova = ols('intensity ~ epoch*config', data=df).fit()
    model_anova = ols(formula, data=df).fit()
    table = sm.stats.anova_lm(model_anova, typ=3) # Type 2 ANOVA DataFrame
    print model_anova.summary()


    vc = {'epoch': '0 + C(epoch)', 'config': '0 + config'}
    #formula = 'intensity ~ C(epoch) + C(config) + C(epoch):C(config)'
    formula = 'intensity ~ epoch + config + epoch*config'
    model_anova2= sm.MixedLM.from_formula(formula, df, vc_formula=vc, groups=df['trial'], re_formula='1')

    result_anova2 = model_anova2.fit()
    print result_anova2.summary()




#%%
def splitplot_anova2_pyvt(roi, rdata, output_dir='/tmp', asdict=True):
#    responsive_rois = {}

    pdf = pyvt_raw_epochXconfig(rdata.dropna(), save_fig=False)
    # Calculate ANOVA split-plot:
    aov = pdf.anova('intensity', sub='trial',
                       wfactors=['epoch'],
                       bfactors=['config'])
    print(aov)

    aov_results_fpath = os.path.join(output_dir, 'visual_anova_results_%s.txt' % roi)
    with open(aov_results_fpath,'wb') as f:
        f.write(str(aov))
    f.close()
#    print aov_results_fpath

    #etas = get_effect_sizes(aov, factor_a='epoch', factor_b='config')
    results_epoch = extract_apa_anova2(('epoch',), aov)
    #res_interaction = extract_apa_anova2(('epoch', 'config'), aov)
#    if res_epoch['p'] < 0.1: # or res_interaction['p'] < 0.1:
#        responsive_rois[roi] = {'F': res_epoch['F'], 'p': res_epoch['p']} #.append(roi)

    if asdict is True:
        return results_epoch
    else:
        return results_epoch['F'], results_epoch['p']


#%%
def id_visual_cells_mp(DATA, output_dir='/tmp', nprocs=4):

    roi_list = sorted(list(set(DATA['roi'])), key=natural_keys)
    print("Calculating split-plot ANOVA (factors=epoch, config) for %i rois." % len(roi_list))

    t_eval_mp = time.time()

    def worker(roi_list, DATA, output_dir, out_q):
        """
        Worker function is invoked in a process. 'roi_list' is a list of
        roi names to evaluate [rois00001, rois00002, etc.]. Results are placed
        in a dict that is pushed to a queue.
        """
        outdict = {}
        for roi in roi_list:
            print roi
            rdata = DATA[DATA['roi']==roi]
            outdict[roi] = splitplot_anova2(roi, rdata, output_dir=output_dir, asdict=True)
        out_q.put(outdict)

    # Each process gets "chunksize' filenames and a queue to put his out-dict into:
    out_q = mp.Queue()
    chunksize = int(math.ceil(len(roi_list) / float(nprocs)))
    procs = []

    for i in range(nprocs):
        p = mp.Process(target=worker,
                       args=(roi_list[chunksize * i:chunksize * (i + 1)],
                                       DATA,
                                       output_dir,
                                       out_q))
        procs.append(p)
        print "Starting:", p
        p.start()

    # Collect all results into single results dict. We should know how many dicts to expect:
    resultdict = {}
    for i in range(nprocs):
        resultdict.update(out_q.get())

    # Wait for all worker processes to finish
    for p in procs:
        print "Finished:", p
        p.join()

    print_elapsed_time(t_eval_mp)

    return resultdict




#%%
def id_visual_cells(DATA, save_figs=False, output_dir='/tmp'):
    '''
    For each ROI, do split-plot ANOVA --
        between-groups factor :  config
        within-groups factor :  epoch
    Use raw intensity to avoid depence of trial-epoch values.
    Save ANOVA results to disk.

    Returns:
        dict() -- keys are rois with p-value < 0.1, values are 'F' and 'p'
    '''

    roi_list = sorted(list(set(DATA['roi'])), key=natural_keys)
    print("Calculating split-plot ANOVA (factors=epoch, config) for %i rois." % len(roi_list))

    #curr_config = 'config006'

    #pdf = pyvt_raw_epochXsinglecond(rdata, curr_config=curr_config)
    #aov = pdf.anova('intensity', sub='trial', wfactors=['epoch'])
    #aov1 = pdf.anova1way('intensity', 'epoch')

    responsive_rois = {} #[]
    for roi in roi_list:
        print roi

        rdata = DATA[DATA['roi']==roi]
#        pdf = pyvt_raw_epochXconfig(rdata, save_fig=False)
#
#        # Calculate ANOVA split-plot:
#        aov = pdf.anova('intensity', sub='trial',
#                           wfactors=['epoch'],
#                           bfactors=['config'])
#        #print(aov)
#
#        aov_results_fpath = os.path.join(output_dir, 'visual_anova_results_%s.txt' % roi)
#        with open(aov_results_fpath,'wb') as f:
#            f.write(str(aov))
#
#        #etas = get_effect_sizes(aov, factor_a='epoch', factor_b='config')
#        res_epoch = extract_apa_anova2(('epoch',), aov)
#        #res_interaction = extract_apa_anova2(('epoch', 'config'), aov)
##        if res_epoch['p'] < 0.1: # or res_interaction['p'] < 0.1:
##            responsive_rois[roi] = {'F': res_epoch['F'], 'p': res_epoch['p']} #.append(roi)
        responsive_rois[roi] = splitplot_anova2(roi, rdata, output_dir=output_dir, asdict=True)

#        if roi in responsive_rois and save_figs is True:
#            factor_list = ['config', 'epoch']
#            fname = '%s_boxplot(intensity~epoch_X_config).png' % roi
#            pdf.box_plot('intensity', factors=factor_list, fname=fname, output_dir=output_dir)

    return responsive_rois


#%%
def plot_box_raw(DATA, roi_list, output_dir='/tmp'):

    for roi in roi_list:
        rdata = DATA[DATA['roi']==roi]
        pdf = pyvt_raw_epochXconfig(rdata, save_fig=False)
        factor_list = ['config', 'epoch']
        fname = '%s_boxplot(intensity~epoch_X_config).png' % roi
        pdf.box_plot('intensity', factors=factor_list, fname=fname, output_dir=output_dir)

#%%

def selectivity_KW(rdata, post_hoc='dunn', asdict=True):

    # Get standard dataframe (not pyvttbl):
    df = roidata_to_df_configs(rdata)

    # Format dataframe and do KW test:
    groupedconfigs = {}
    for grp in df['config'].unique():
        groupedconfigs[grp] = df[df['config']==grp]['dff'].values
    args = groupedconfigs.values()
    H, p = stats.kruskal(*args)

    # Do post-hoc test:
    if post_hoc == 'dunn':
        pc = sp.posthoc_dunn(df, val_col='dff', group_col='config')
    elif post_hoc == 'conover':
        pc = sp.posthoc_conover(df, val_col='dff', group_col='config')

    # Save ROI info:
    posthoc_results = {'H': H,
                       'p': p,
                       'post_hoc': post_hoc,
                       'p_rank': pc}
    if asdict is True:
        return posthoc_results
    else:
        return posthoc_results['H'], posthoc_results['p'], pc

#%%
def id_selective_cells_mp(DATA, nprocs=4):

    roi_list = sorted(list(set(DATA['roi'])), key=natural_keys)
    print("Calculating KW selectivity test for %i rois." % len(roi_list))

    t_eval_mp = time.time()

    def worker(roi_list, DATA, out_q):
        """
        Worker function is invoked in a process. 'roi_list' is a list of
        roi names to evaluate [rois00001, rois00002, etc.]. Results are placed
        in a dict that is pushed to a queue.
        """
        outdict = {}
        for roi in roi_list:
            print roi
            rdata = DATA[DATA['roi']==roi]
            outdict[roi] = selectivity_KW(rdata, post_hoc='dunn', asdict=True)
        out_q.put(outdict)

    # Each process gets "chunksize' filenames and a queue to put his out-dict into:
    out_q = mp.Queue()
    chunksize = int(math.ceil(len(roi_list) / float(nprocs)))
    procs = []

    for i in range(nprocs):
        p = mp.Process(target=worker,
                       args=(roi_list[chunksize * i:chunksize * (i + 1)],
                                       DATA,
                                       out_q))
        procs.append(p)
        p.start()

    # Collect all results into single results dict. We should know how many dicts to expect:
    resultdict = {}
    for i in range(nprocs):
        resultdict.update(out_q.get())

    # Wait for all worker processes to finish
    for p in procs:
        print "Finished:", p
        p.join()

    print_elapsed_time(t_eval_mp)

    return resultdict

#%%
def id_selective_cells(DATA, roi_list, topn=10, test_normal=False, post_hoc='dunn', save_figs=False, output_dir='/tmp'):

    ph_results = {}

    for ridx,roi in enumerate(roi_list):
        rdata = DATA[DATA['roi']==roi]

        # Get standard dataframe (not pyvttbl):
        df = roidata_to_df_configs(rdata)

        if ridx < topn and save_figs:
            print roi
            #% Sort configs by mean value:
            grped = df.groupby(['config']) #.mean()
            df2 = pd.DataFrame({col:vals['dff'] for col,vals in grped})
            meds = df2.median().sort_values(ascending=False)
            df2 = df2[meds.index]
            pl.figure(figsize=(10,5))
            ax = sns.boxplot(data=df2)
            pl.title(roi)
            pl.ylabel('df/f')
            ax.set_xticklabels(['%i deg\n%.2f cpd\n%s' % (stimconfigs[t.get_text()]['rotation'],
                                                          stimconfigs[t.get_text()]['frequency'],
                                                          t.get_text()) for t in ax.get_xticklabels()])

            figname = 'box_mediandff_%s.png' % roi
            pl.savefig(os.path.join(output_dir, figname))
            pl.close()

        normality = False
        if test_normal:
            k2, pn = stats.mstats.normaltest(df['dff'])
            if pn < 0.05:
                print("Normal test: p < 0.05, k=%.2f" % k2)
                normality = False
            else:
                print("Normal test: p > 0.05, k=%.2f" % k2)
                normality = True

            # Check for normality:
            if ridx < topn and save_figs:
                pl.figure()
                qq_res = stats.probplot(df['dff'], dist="norm", plot=pl)
                pl.title('P-P plot %s' % roi)
                pl.text(-2, 0.3, 'p=%s' % str(pn))
                pl.show()
                figname = 'PPplot_%s.png' % roi
                pl.savefig(os.path.join(output_dir, figname))
                pl.close()

            # Check if STDs are equal (ANOVA):
            #df.groupby(['config']).std()

        if normality is False:
            # Format dataframe and do KW test:
            groupedconfigs = {}
            for grp in df['config'].unique():
                groupedconfigs[grp] = df[df['config']==grp]['dff'].values
            args = groupedconfigs.values()
            H, p = stats.kruskal(*args)

            # Do post-hoc test:
            if post_hoc == 'dunn':
                pc = sp.posthoc_dunn(df, val_col='dff', group_col='config')
            elif post_hoc == 'conover':
                pc = sp.posthoc_conover(df, val_col='dff', group_col='config')

            if ridx < topn and save_figs:
                # Plot heatmap of p-values from post-hoc test:
                pl.figure(figsize=(10,8))
                pl.title('%s test, %s' % (post_hoc, roi))
                cmap = ['1', '#fb6a4a',  '#08306b',  '#4292c6', '#c6dbef']
                heatmap_args = {'cmap': cmap, 'linewidths': 0.25, 'linecolor': '0.5',
                                'clip_on': False, 'square': True,
                                'cbar_ax_bbox': [0.90, 0.35, 0.02, 0.3]}
                sp.sign_plot(pc, **heatmap_args)
                figname = 'pvalues_%s_%s.png' % (roi, post_hoc)
                pl.savefig(os.path.join(output_dir, figname))
                pl.close()
#        else:
#
#            # 1-way ANOVA (only valid under condNs):
#            pdf = pyvt_stimdf_configs(rdata)
#            aov = pdf.anova1way('dff', 'config') #
#            print(aov)
#
#            tukey = pairwise_tukeyhsd(df['dff'], df['config'])
#            print(tukey)

        # Save ROI info:
        ph_results[roi] = {'H': H,
                           'p': p,
                           'post_hoc': post_hoc,
                           'p_rank': pc}

    return ph_results

#%%

def selectivity_ANOVA2(roi, rdata, trans_types, output_dir='/tmp'):

    df = roidata_to_df_transforms(rdata, trans_types)
    pdf = pd_to_pyvtt_transforms(df, trans_types)

    # Calculate ANOVA split-plot:
    aov = pdf.anova('dff', sub='trial',
                       bfactors=trans_types)
    #print(aov)

    aov_results_fpath = os.path.join(selective_resultsdir, 'selectivity_2wayanova_results_%s.txt' % roi)
    with open(aov_results_fpath,'wb') as f:
        f.write(str(aov))
    f.close()

    #etas = get_effect_sizes(aov, factor_a='epoch', factor_b='config')
    # Identify which, if any, factors are significant:
#    factor_types = aov.keys()
#    res_epoch = {}
#    for factor in factor_types:
#        res_epoch[factor] = extract_apa_anova2(factor, aov)

    res_epoch = extract_apa_anova2(factor_types, aov)


    return res_epoch


def id_selectivity_anova_mp(roi_list, DATA, trans_types, output_dir='/tmp', nprocs=4):

    print "Calculating %i-way ANOVA for %i rois (factors: %s)" % (len(trans_types), len(roi_list), str(trans_types))

    t_eval_mp = time.time()

    def worker(roi_list, DATA, trans_types, output_dir, out_q):
        """
        Worker function is invoked in a process. 'roi_list' is a list of
        roi names to evaluate [rois00001, rois00002, etc.]. Results are placed
        in a dict that is pushed to a queue.
        """
        outdict = {}
        for roi in roi_list:
            print roi
            rdata = DATA[DATA['roi']==roi]
            outdict[roi] = selectivity_ANOVA2(roi, rdata, trans_types, output_dir=output_dir)
        out_q.put(outdict)

    # Each process gets "chunksize' filenames and a queue to put his out-dict into:
    out_q = mp.Queue()
    chunksize = int(math.ceil(len(roi_list) / float(nprocs)))
    procs = []

    for i in range(nprocs):
        p = mp.Process(target=worker,
                       args=(roi_list[chunksize * i:chunksize * (i + 1)],
                                       DATA,
                                       trans_types,
                                       output_dir,
                                       out_q))
        procs.append(p)
        print "Starting:", p
        p.start()

    # Collect all results into single results dict. We should know how many dicts to expect:
    resultdict = {}
    for i in range(nprocs):
        print "Getting results:", i
        resultdict.update(out_q.get())

    # Wait for all worker processes to finish
    for p in procs:
        print "Finished:", p
        p.join()

    print_elapsed_time(t_eval_mp)

    return resultdict


def id_selectivity_anova(roi_list, DATA, trans_types, output_dir='/tmp'):
    results = {}

    for roi in roi_list:
        print roi
        rdata = DATA[DATA['roi']==roi]
        results[roi] = selectivity_ANOVA2(roi, rdata, trans_types, output_dir=output_dir)

    return results

#%%
def uint16_to_RGB(img):
    im = img.astype(np.float64)/img.max()
    im = 255 * im
    im = im.astype(np.uint8)
    rgb = cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)
    return rgb

#%%
def assign_OSI(DATA, roi_list, stimconfigs):

    selectivity = {}
    for roi in roi_list:
        ridx = int(roi[3:]) - 1

        rdata = DATA[DATA['roi']==roi]

        # Get standard dataframe (not pyvttbl):
        df = roidata_to_df_configs(rdata)

        stimdf_means = df.groupby(['config'])['dff'].mean()
        ordered_configs = stimdf_means.sort_values(ascending=False).index
        Rmost = stimdf_means[ordered_configs[0]]
        Rleast = stimdf_means[ordered_configs[-1]]
        SI = (Rmost - Rleast) / (Rmost + Rleast)

        # If > 1 SF, use best one:
        sfs = list(set([stimconfigs[config]['frequency'] for config in stimconfigs.keys()]))
        sort_config_types = {}
        for sf in sfs:
            sort_config_types[sf] = sorted([config for config in stimconfigs.keys()
                                                if stimconfigs[config]['frequency']==sf],
                                                key=lambda x: stimconfigs[x]['rotation'])

        oris = [stimconfigs[config]['rotation'] for config in sort_config_types[sf]]

        orientation_list = sort_config_types[stimconfigs[ordered_configs[0]]['frequency']]

        OSI = np.abs( sum([stimdf_means[cfg]*np.exp(2j*theta) for theta, cfg in zip(oris, orientation_list)]) / sum([stimdf_means[cfg] for cfg in  orientation_list]) )

        selectivity[roi] = {'ridx': ridx,
                            'SI': SI,
                            'OSI': OSI,
                            'ori': stimconfigs[ordered_configs[0]]['rotation']}

    return selectivity

#%%
def get_reference_config(Cmax_overall, trans_types, transform_dict):
    Cref = {}
    if 'xpos' in trans_types:
        xpos_tidx = trans_types.index('xpos')
        ref_xpos = Cmax_overall[xpos_tidx]
    else:
        ref_xpos = transform_dict['xpos'][0]

    if 'ypos' in trans_types:
        ypos_tidx = trans_types.index('ypos')
        ref_ypos = Cmax_overall[ypos_tidx]
    else:
        ref_ypos = transform_dict['ypos'][0]

    if 'size' in trans_types:
        size_tidx = trans_types.index('size')
        ref_size = Cmax_overall[size_tidx]
    else:
        ref_size = transform_dict['size'][0]

    if 'sf' in trans_types:
        sf_tidx = trans_types.index('sf')
        ref_sf = Cmax_overall[sf_tidx]
    else:
        if 'sf' in transform_dict.keys():
            ref_sf = transform_dict['sf'][0]

    Cref = {'xpos': ref_xpos,
            'ypos': ref_ypos,
            'size': ref_size}

    if 'sf' in trans_types:
        Cref['sf'] = ref_sf

    return Cref

#%% SELECTIVITY -- calculate a sparseness measure:

# Case:  Transformations in xpos, ypos
# Take as "reference" position, the x- and y-position eliciting the max response

def calc_sparseness(df, trans_types, transform_dict):

    stimdf_means = df.groupby(trans_types)['dff'].mean()
    ordered_configs = stimdf_means.sort_values(ascending=False).index
    if isinstance(ordered_configs, pd.MultiIndex):
        ordered_configs = ordered_configs.tolist()
    Cmax_overall = ordered_configs[0]

    Cref = get_reference_config(Cmax_overall, trans_types, transform_dict)

    object_resp_df = stimdf_means.copy()
    if 'xpos' in trans_types:
        object_resp_df = object_resp_df.xs(Cref['xpos'], level='xpos')
    if 'ypos' in trans_types:
        object_resp_df = object_resp_df.xs(Cref['ypos'], level='ypos')
    if 'size' in trans_types:
        object_resp_df = object_resp_df.xs(Cref['size'], level='size')
    if 'sf' in trans_types:
        object_resp_df = object_resp_df.xs(Cref['sf'], level='sf')

    # TODO:  what to do if stim_df values are negative??
    if all(object_resp_df.values < 0):
        S = 0
    else:
        object_list = object_resp_df.index.tolist()
        nobjects = len(object_list)
        t1a = (sum([(object_resp_df[i] / nobjects) for i in object_list])**2)
        t1b = sum([object_resp_df[i]**2/nobjects for i in object_list])
        S = (1 - (t1a / t1b)) / (1-(1/nobjects))

    sparseness_ref = {'S': S, 'object_responses': object_resp_df}

    return sparseness_ref

#%%
def assign_sparseness_index_mp(roi_list, DATA, trans_types, transform_dict, nprocs=4):

    print("Calculating SPARSENESS index for %i rois." % len(roi_list))

    t_eval_mp = time.time()

    def worker(roi_list, DATA, trans_types, transform_dict, out_q):
        """
        Worker function is invoked in a process. 'roi_list' is a list of
        roi names to evaluate [rois00001, rois00002, etc.]. Results are placed
        in a dict that is pushed to a queue.
        """
        outdict = {}
        for roi in roi_list:
            print roi
            rdata = DATA[DATA['roi']==roi]
            df = roidata_to_df_transforms(rdata, trans_types)
            outdict[roi] = calc_sparseness(df, trans_types, transform_dict)
        out_q.put(outdict)

    # Each process gets "chunksize' filenames and a queue to put his out-dict into:
    out_q = mp.Queue()
    chunksize = int(math.ceil(len(roi_list) / float(nprocs)))
    procs = []

    for i in range(nprocs):
        p = mp.Process(target=worker,
                       args=(roi_list[chunksize * i:chunksize * (i + 1)],
                                       DATA,
                                       trans_types,
                                       transform_dict,
                                       out_q))
        procs.append(p)
        p.start()

    # Collect all results into single results dict. We should know how many dicts to expect:
    resultdict = {}
    for i in range(nprocs):
        resultdict.update(out_q.get())

    # Wait for all worker processes to finish
    for p in procs:
        print "Finished:", p
        p.join()

    print_elapsed_time(t_eval_mp)

    return resultdict


#%%
#
#roi = 'roi00006'
#rdata = DATA[DATA['roi']==roi]
#df = roidata_to_df_transforms(rdata, trans_types)
#
#stimdf_means = df.groupby(trans_types)['dff'].mean()
#

#%%
def extract_options(options):

    parser = optparse.OptionParser()

    parser.add_option('-D', '--root', action='store', dest='rootdir',
                          default='/nas/volume1/2photon/data',
                          help='data root dir (dir containing all animalids) [default: /nas/volume1/2photon/data, /n/coxfs01/2pdata if --slurm]')
    parser.add_option('-i', '--animalid', action='store', dest='animalid',
                          default='', help='Animal ID')

    # Set specific session/run for current animal:
    parser.add_option('-S', '--session', action='store', dest='session',
                          default='', help='session dir (format: YYYMMDD_ANIMALID')
    parser.add_option('-A', '--acq', action='store', dest='acquisition',
                          default='FOV1', help="acquisition folder (ex: 'FOV1_zoom3x') [default: FOV1]")
    parser.add_option('-T', '--trace-type', action='store', dest='trace_type',
                          default='raw', help="trace type [default: 'raw']")

    parser.add_option('-R', '--run', dest='run_list', default=[], nargs=1,
                          action='append',
                          help="run ID in order of runs")
    parser.add_option('-t', '--traceid', dest='traceid_list', default=[], nargs=1,
                          action='append',
                          help="trace ID in order of runs")
    parser.add_option('-n', '--nruns', action='store', dest='nruns', default=1, help="Number of consecutive runs if combined")

    parser.add_option('--slurm', action='store_true', dest='slurm', default=False, help="set if running as SLURM job on Odyssey")
    parser.add_option('--par', action='store_true', dest='multiproc', default=False, help="set if want to run MP on roi stats, when possible")
    parser.add_option('--nproc', action='store', dest='nprocesses', default=4, help="N processes if running in par (default=4)")

    parser.add_option('--combo', action='store_true', dest='combined', default=False, help="Set if using combined runs with same default name (blobs_run1, blobs_run2, etc.)")


    # Pupil filtering info:
    parser.add_option('--no-pupil', action="store_false",
                      dest="filter_pupil", default=True, help="Set flag NOT to filter PSTH traces by pupil threshold params")
    parser.add_option('-s', '--radius-min', action="store",
                      dest="pupil_radius_min", default=25, help="Cut-off for smnallest pupil radius, if --pupil set [default: 25]")
    parser.add_option('-B', '--radius-max', action="store",
                      dest="pupil_radius_max", default=65, help="Cut-off for biggest pupil radius, if --pupil set [default: 65]")
    parser.add_option('-d', '--dist', action="store",
                      dest="pupil_dist_thr", default=5, help="Cut-off for pupil distance from start, if --pupil set [default: 5]")

    (options, args) = parser.parse_args(options)

    return options


  from pandas.core import datetools


In [3]:

options = ['-D', '/mnt/odyssey', '-i', 'CE077', '-S', '20180425', '-A', 'FOV1_zoom1x',
           '-T', 'np_subtracted', '--no-pupil',
           '-R', 'gratings_run2', '-t', 'traces001',
           '-n', '1']
#
#options = ['-D', '/mnt/odyssey', '-i', 'CE077', '-S', '20180425', '-A', 'FOV1_zoom1x',
#           '-T', 'np_subtracted', '--no-pupil',
#           '-R', 'blobs_run1', '-t', 'traces002',
#           '-n', '1']

#options = ['-D', '/mnt/odyssey', '-i', 'CE084', '-S', '20180511', '-A', 'FOV1_zoom1x',
#           '-T', 'np_subtracted', '--no-pupil',
#           '-R', 'gratings_run1', '-t', 'traces002',
#           '-n', '1']

#%%

options = extract_options(options)

rootdir = options.rootdir
animalid = options.animalid
session = options.session
acquisition = options.acquisition
slurm = options.slurm
if slurm is True:
    rootdir = '/n/coxfs01/2p-data'

trace_type = options.trace_type

run_list = options.run_list
traceid_list = options.traceid_list

filter_pupil = options.filter_pupil
pupil_radius_max = float(options.pupil_radius_max)
pupil_radius_min = float(options.pupil_radius_min)
pupil_dist_thr = float(options.pupil_dist_thr)
pupil_max_nblinks = 0

multiproc = options.multiproc
nprocesses = int(options.nprocesses)
combined = options.combined
nruns = int(options.nruns)

acquisition_dir = os.path.join(rootdir, animalid, session, acquisition)
if combined is False:
    runfolder = run_list[0]
    traceid = traceid_list[0]
    with open(os.path.join(acquisition_dir, runfolder, 'traces', 'traceids_%s.json' % runfolder), 'r') as f:
        tdict = json.load(f)
    tracefolder = '%s_%s' % (traceid, tdict[traceid]['trace_hash'])
    traceid_dir = os.path.join(rootdir, animalid, session, acquisition, runfolder, 'traces', tracefolder)
else:
    assert len(run_list) == nruns, "Incorrect runs or number of runs (%i) specified!\n%s" % (nruns, str(run_list))
    runfolder = '_'.join(run_list)
    if len(traceid_list)==1:
        traceid = '_'.join([traceid_list[0] for i in range(nruns)])
    traceid_dir = os.path.join(rootdir, animalid, session, acquisition, runfolder, traceid)


print(traceid_dir)
assert os.path.exists(traceid_dir), "Specified traceid-dir does not exist!"


#%% # Load ROIDATA file:
print "Loading ROIDATA file..."

roidf_fn = [i for i in os.listdir(traceid_dir) if i.endswith('hdf5') and 'ROIDATA' in i and trace_type in i][0]
roidata_filepath = os.path.join(traceid_dir, roidf_fn) #'ROIDATA_098054_626d01_raw.hdf5')
DATA, datakey = load_roi_dataframe(roidata_filepath)

transform_dict, object_transformations = vis.get_object_transforms(DATA)
trans_types = object_transformations.keys()

#%% Set filter params:

if filter_pupil is True:
    pupil_params = acq.set_pupil_params(radius_min=pupil_radius_min,
                                        radius_max=pupil_radius_max,
                                        dist_thr=pupil_dist_thr,
                                        create_empty=False)
elif filter_pupil is False:
    pupil_params = acq.set_pupil_params(create_empty=True)


/mnt/odyssey/CE077/20180425/FOV1_zoom1x/gratings_run2/traces/traces001_0b89cd
Loading ROIDATA file...
[0, 45, 90, 135, 180, 225, 270, 315]


In [4]:


#%%  Create output dir for ROI selection:
# =============================================================================

print "Creating OUTPUT DIRS for ROI analyses..."

if '/' in datakey:
    datakey = datakey[1:]
sort_dir = os.path.join(traceid_dir, 'sorted_%s' % datakey)
sort_resultsdir = os.path.join(sort_dir, 'anova_results')
sort_figdir = os.path.join(sort_dir, 'figures')

responsive_resultsdir = os.path.join(sort_dir, 'anova_results', 'responsive_tests')
selective_resultsdir = os.path.join(sort_dir, 'anova_results', 'selectivity_tests')

if not os.path.exists(sort_figdir):
    os.makedirs(sort_figdir)

if not os.path.exists(responsive_resultsdir):
    os.makedirs(responsive_resultsdir)
if not os.path.exists(selective_resultsdir):
    os.makedirs(selective_resultsdir)

tolerance_figdir = os.path.join(sort_dir, 'figures', 'tolerance')
if not os.path.exists(tolerance_figdir):
    os.makedirs(tolerance_figdir)

#%% Get stimulus config info:assign_roi_selectivity
# =============================================================================

rundir = os.path.join(rootdir, animalid, session, acquisition, runfolder)

if combined is True:
    stimconfigs_fpath = os.path.join(traceid_dir, 'stimulus_configs.json')
else:
    stimconfigs_fpath = os.path.join(rundir, 'paradigm', 'stimulus_configs.json')

with open(stimconfigs_fpath, 'r') as f:
    stimconfigs = json.load(f)

print "Loaded %i stimulus configurations." % len(stimconfigs.keys())


Creating OUTPUT DIRS for ROI analyses...
Loaded 8 stimulus configurations.


In [5]:
roi_list = sorted(list(set(DATA['roi'])), key=natural_keys)
roi = roi_list[0]
print roi

roi00001


In [8]:
rdata = DATA[DATA['roi']==roi]

In [9]:

pdf = pyvt_raw_epochXconfig(rdata.dropna(), save_fig=False)
# Calculate ANOVA split-plot:
aov = pdf.anova('intensity', sub='trial',
                   wfactors=['epoch'],
                   bfactors=['config'])


  return list(array(list(zeros((p-len(b))))+b)+1.)


In [10]:
    print(aov)


intensity ~ epoch * config

TESTS OF BETWEEN-SUBJECTS EFFECTS

Measure: intensity
     Source          Type III     df       MS         F     Sig.    et2_G   Obs.     SE      95% CI    lambda   Obs.  
                        SS                                                                                      Power 
Between Subjects   43274744.962   79                                                                                  
config              1767227.917    7   252461.131   0.438   0.875   0.034     10   244.202   478.636    0.426   0.064 
Error              41507517.045   72   576493.292                                                                     

TESTS OF WITHIN SUBJECTS EFFECTS

Measure: intensity
    Source                             Type III      eps      df         MS         F     Sig.    et2_G   Obs.     SE      95% CI    lambda   Obs.  
                                          SS                                                                            