# Initialization

In [8]:
import numpy as np
import pandas as pd
from ipywidgets import interact
import matplotlib.pyplot as plt
import seaborn as sns
import pystan
import pickle
from scipy.stats import gamma, norm

# load in behavioral data and clean
data = pd.read_csv('../data/derivatives/behavior/group_data.tsv', sep='\t', 
                   na_values='n/a')
exclusions = ['no_response', 'error', 'post_error', 'fast_rt']
data = data[data[exclusions].sum(axis=1) == 0]

subjects = list(data.participant_id.unique())

sns.set(style='whitegrid', font_scale=2)

# Weibull Models

In [9]:
def weibull(x, alpha, sigma, shift):
    p1 = (alpha / sigma)
    p2 = np.power((x - shift) / sigma, alpha - 1) 
    p3 = np.exp(-np.power((x - shift) / sigma, alpha))
    return p1 * p2 * p3

## Weibull Intuition

In [10]:
def plot_weibull(shape, scale, shift):
    x = np.arange(shift, 1.75, .001, dtype=np.float64)
    plt.plot(x, weibull(x, shape, scale, shift))
    plt.xlim((0, 1.75))
    plt.show();
    
interact(plot_weibull, shape=(1, 5, .01), scale=(0, 2, .01), shift=(0, 2, .01));

## Prior Intuition & Determination

### Build distribution of individual subject MAPs

First, we compile the inidividual subject stan model.

In [11]:
model = pystan.StanModel(file='../stan_models/single_subject_non-additive_weibull.stan')

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_8b64186d7c72474f92436047d86f728a NOW.


Next we gather the map estimates for each subject's shift, scale, and shape parameters split by condition. 

In [13]:
maps = []
for subject in subjects:
    print(subject)
    sub_data = data[data.participant_id == subject]
    rt_c = sub_data[sub_data.trial_type == 'congruent'].response_time
    rt_i = sub_data[sub_data.trial_type == 'incongruent'].response_time
    data_in = {'Ni': len(rt_i), 'Nc': len(rt_c), 'rt_c': rt_c, 'rt_i': rt_i}
    
    op = model.optimizing(data=data_in, seed=8)
    maps.append(op)
print('Done!')

sub-hc001
sub-hc002
sub-hc003
sub-hc004
sub-hc005
sub-hc006
sub-hc007
sub-hc008
sub-hc009
sub-hc010
sub-hc011
sub-hc012
sub-hc013
sub-hc014
sub-hc015
sub-hc016
sub-hc017
sub-hc018
sub-hc019
sub-hc020
sub-hc021
sub-hc022
sub-hc023
sub-hc024
sub-hc025
sub-hc026
sub-hc027
sub-hc028
sub-hc029
sub-hc030
sub-hc031
sub-hc032
sub-hc033
sub-hc034
sub-hc035
sub-hc036
sub-hc037
sub-hc042
sub-hc044
sub-hc045
sub-pp001
sub-pp002
sub-pp003
sub-pp004
sub-pp005
sub-pp006
sub-pp007
sub-pp008
sub-pp009
sub-pp010
sub-pp011
sub-pp012
sub-pp013
sub-pp014
sub-pp015
sub-pp016
sub-hc038
sub-hc041
sub-hc047
Done!


Next, we sift through the map distribution fits to each subject's data as a sanity check of how well we can fit.

In [14]:
def plot_subject_fit(subject):
    f, ax = plt.subplots(1, 1, figsize=(16, 8))
    colors = ['#e41a1c', '#377eb8']
    conditions = ['incongruent', 'congruent']
    sub_ix = subjects.index(subject)
    mapp = maps[sub_ix]
    sub_data = data[data.participant_id == subject]
    
    for i, c in enumerate(conditions):
        rt = sub_data[sub_data.trial_type == c].response_time
        x = np.arange(mapp['shift_%s' % c[0]], 1.75, .01)
        sns.distplot(rt, color=colors[i], ax=ax, kde=False, 
                     norm_hist=True)
        plt.plot(x, weibull(x, mapp['shape_%s' % c[0]], 
                            mapp['scale_%s' % c[0]], 
                            mapp['shift_%s' % c[0]]), color=colors[i])
    
    plt.legend(conditions)
    plt.xlim((0, 1.75))
    plt.ylim((0, 4))
    plt.show()
    
interact(plot_subject_fit, subject=subjects);

Finally, we look at the parameter map distributions to get a sense of reasonable range of expected parameters.

In [18]:
def plot_map_dist(param):
    f, ax = plt.subplots(1, 1, figsize=(16, 8))
    colors = ['#e41a1c', '#377eb8']
    conditions = ['incongruent', 'congruent']
    for i, c in enumerate(conditions):
        values = [m['%s_%s' % (param, c[0])] for m in maps]
        sns.distplot(values, color=colors[i], bins=20)
    plt.legend(conditions)
    plt.xlabel(param)
    plt.show()

interact(plot_map_dist, param=['shift', 'scale', 'shape']);

### Visualizing Prior Distribution

For the hiearchical model, we are not going to model each condition with a separate distribution. Instead, we will do a regression approach.

First we, look at the priors for the scale parameter.

In [19]:
def plot_scale_prior(shape1, scale1, shape2, scale2):
    f, axs = plt.subplots(1, 3, figsize=(16, 8))
    
    gamma1 = gamma(shape1, scale=scale1)
    shape3 = gamma1.mean()
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[0].plot(x, gamma1.pdf(x))
    
    gamma2 = gamma(shape2, scale=scale2)
    scale3 = gamma2.mean()
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[1].plot(x, gamma2.pdf(x))
    
    gamma3 = gamma(shape3, scale=scale3)
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[2].plot(x, gamma3.pdf(x))
    plt.show()

interact(plot_scale_prior, shape1=(0, 3, .1), scale1=(0, 4, .1), 
         shape2=(0, 3, .1), scale2=(0, 4, .1));

Shape prior

In [7]:
def plot_shape_prior(shape1, scale1, shape2, scale2):
    f, axs = plt.subplots(1, 3, figsize=(16, 8))
    
    gamma1 = gamma(shape1, scale=scale1)
    shape3 = gamma1.mean()
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[0].plot(x, gamma1.pdf(x))
    
    gamma2 = gamma(shape2, scale=scale2)
    scale3 = gamma2.mean()
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[1].plot(x, gamma2.pdf(x))
    
    gamma3 = gamma(shape3, scale=scale3)
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[2].plot(x, gamma3.pdf(x))
    plt.show()

interact(plot_shape_prior, shape1=(0, 3, .1), scale1=(0, 4, .01), 
         shape2=(0, 3, .1), scale2=(0, 4, .01));

## Hierarchical Model Fitting 

### Prep Data

In [4]:
ns = len(data.participant_id.unique())
ll = data.participant_id.astype('category').cat.codes + 1
rt = data.response_time
tt = data.trial_type.astype('category').cat.codes

min_rts = np.array(data.groupby(['participant_id', 'trial_type']).response_time.min())
min_rt_i = min_rts[1::2]
min_rt_c = min_rts[::2]

data_in = {'Ns': ns, 'll': ll, 'Nt': len(rt), 'rt': rt, 'tt': tt, 
           'min_rt_i': min_rt_i, 'min_rt_c': min_rt_c}

### Compile & Fit Model

In [5]:
model = pystan.StanModel(file='../stan_models/weibull_hierarchical_additive.stan',
                         model_name='weibull_hierarchical')

INFO:pystan:COMPILING THE C++ CODE FOR MODEL weibull_hierarchical_8ced4885fd3a5093d70202d34ed6ffdc NOW.


In [6]:
init_dict = {'scale_hyp1': 2, 'scale_hyp2': 0.5, 'shape_hyp1': 2, 'shape_hyp2': 0.5,
             'scale_base': [.2] * len(subjects), 
             'shape_base': [2] * len(subjects),
             'scale_group_beta': 0.1, 'scale_beta': [0.1] * len(subjects),
             'shape_group_beta': 0.1, 'shape_beta': [0.1] * len(subjects),
             'shift_base_i': [0.1] * len(subjects), 'shift_base_c': [0.1] * len(subjects)}
mapp = model.optimizing(data=data_in, seed=5, init=init_dict)

In [None]:
init_dict = {'scale_hyp1': 2, 'scale_hyp2': 0.5, 'shape_hyp1': 2, 'shape_hyp2': 0.5,
             'scale_base': [.2] * len(subjects), 
             'shape_base': [2] * len(subjects),
             'scale_group_beta': 0.1, 'scale_beta': [0.1] * len(subjects),
             'shape_group_beta': 0.1, 'shape_beta': [0.1] * len(subjects),
             'shift_base_i': [0.1] * len(subjects), 'shift_base_c': [0.1] * len(subjects)}
n_chains = 10
fit = model.sampling(data=data_in, iter=100, chains=n_chains, 
                     init=[init_dict] * n_chains, seed=10)

In [None]:
print('h')

In [10]:
model_fit = {'model': model, 'mapp': mapp, 'fit': fit}
pickle.dump(model_fit, open('../stan_models/weibull_hierarchical.pkl', 'w'))

The relevant StanModel instance must be pickled along with this fit object.
When unpickling the StanModel must be unpickled first.
  rv = reduce(self.proto)


### Plot the Posteriors

In [None]:
def plot_posterior(param):
    
    f, axs = plt.subplots(1, 2, figsize=(16, 8))
    
    samples = fit.extract(pars=[param])
    samples = samples[param]
    print(samples.shape)
    if len(samples.shape) > 1:
        nd = samples.shape[1]
        for j in range(nd):
            f, axs = plt.subplots(1, 2, figsize=(16, 8))
            sns.distplot(samples[:, j], ax=axs[0])
            axs[0].set_title('%s Density' % param)

            axs[1].plot(samples)
            axs[1].set_title('%s Traceplot' % param)
    else:
        sns.distplot(samples, ax=axs[0])

        
    plt.show()
interact(plot_posterior, param=init_dict.keys())

<function __main__.plot_posterior>

### Plot the MAP Fits

In [18]:
def plot_subject_fit(subject):
    plt.close('all')
    f, ax = plt.subplots(1, 1, figsize=(16, 8))
    colors = ['#e41a1c', '#377eb8']
    conditions = ['congruent', 'incongruent']
    sub_ix = subjects.index(subject)
    sub_data = data[data.participant_id == subject]
    
    for i, c in enumerate(conditions):
        
        if c == 'congruent':
            shape = mapp['shape_base'][sub_ix]
            scale = mapp['scale_base'][sub_ix]
            shift = mapp['shift_base_c'][sub_ix] * min_rt_c[sub_ix]
        else:
            shape = mapp['shape_base'][sub_ix] + mapp['shape_beta'][sub_ix]
            scale = mapp['scale_base'][sub_ix] + mapp['scale_beta'][sub_ix]
            shift = mapp['shift_base_i'][sub_ix] * min_rt_i[sub_ix]
            
            
        rt = sub_data[sub_data.trial_type == c].response_time
        x = np.arange(shift, 1.75, .01)
        sns.distplot(rt, color=colors[i], ax=ax, kde=False, 
                     norm_hist=True)
            
        plt.plot(x, weibull(x, shape, scale, shift), color=colors[i])
    
    plt.legend(conditions)
    plt.xlim((0, 1.75))
    plt.ylim((0, 4))
    plt.show()
    
interact(plot_subject_fit, subject=subjects);

In [62]:
def plot_map_dist(param):
    plt.close('all')
    values = mapp[param]
    if values.shape:
        values = np.unique(values)
        f, ax = plt.subplots(1, 1, figsize=(16, 8))
        sns.distplot(values)
        plt.xlabel(param)
        plt.show()
    else:
        print(values)

interact(plot_map_dist, param=mapp.keys());