# Initialization

In [7]:
# add custom functions to path
import sys
sys.path.append("../src")

%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from ipywidgets import interact, fixed
import matplotlib.pyplot as plt
import seaborn as sns
import pystan
import pickle
from scipy.stats import gamma, norm
from stan import fit_model, plot_weibull_subject_fit, plot_map_estimates
from stan import plot_posterior

# load in behavioral data and clean
data = pd.read_csv('../data/derivatives/behavior/group_data.tsv', sep='\t', 
                   na_values='n/a')
exclusions = ['no_response', 'error', 'post_error', 'fast_rt']
data = data[data[exclusions].sum(axis=1) == 0]

subjects = sorted(list(data.participant_id.unique()))
print(subjects)

sns.set(style='whitegrid', font_scale=2)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
['sub-hc001', 'sub-hc002', 'sub-hc003', 'sub-hc004', 'sub-hc005', 'sub-hc006', 'sub-hc007', 'sub-hc008', 'sub-hc009', 'sub-hc010', 'sub-hc011', 'sub-hc012', 'sub-hc013', 'sub-hc014', 'sub-hc015', 'sub-hc016', 'sub-hc017', 'sub-hc018', 'sub-hc019', 'sub-hc020', 'sub-hc021', 'sub-hc022', 'sub-hc023', 'sub-hc024', 'sub-hc025', 'sub-hc026', 'sub-hc027', 'sub-hc028', 'sub-hc029', 'sub-hc030', 'sub-hc031', 'sub-hc032', 'sub-hc033', 'sub-hc034', 'sub-hc035', 'sub-hc036', 'sub-hc037', 'sub-hc038', 'sub-hc041', 'sub-hc042', 'sub-hc044', 'sub-hc045', 'sub-hc047', 'sub-pp001', 'sub-pp002', 'sub-pp003', 'sub-pp004', 'sub-pp005', 'sub-pp006', 'sub-pp007', 'sub-pp008', 'sub-pp009', 'sub-pp010', 'sub-pp011', 'sub-pp012', 'sub-pp013', 'sub-pp014', 'sub-pp015', 'sub-pp016']


# Weibull Model

Main parameters with interpretations from Rouder 2005:
- shape: Change in structure of central processing. Such as different overall processing strategy.
- scale: Differences in speed of central processing given similar structure.
- shift: Differences in peripheral processing (motor, visual, etc.).

<img src="../models/weibull_hierarchical/weibull_hierarchical.png">

In [9]:
def weibull(x, alpha, sigma, shift):
    p1 = (alpha / sigma)
    p2 = np.power((x - shift) / sigma, alpha - 1) 
    p3 = np.exp(-np.power((x - shift) / sigma, alpha))
    return p1 * p2 * p3

## Weibull Intuition

In [10]:
def plot_weibull(shape, scale, shift):
    x = np.arange(shift, 1.75, .001, dtype=np.float64)
    plt.plot(x, weibull(x, shape, scale, shift))
    plt.xlim((0, 1.75))
    plt.show();
    
interact(plot_weibull, shape=(1, 5, .01), scale=(0, 2, .01), shift=(0, 2, .01));

## Prior Intuition & Determination

### Build distribution of individual subject MAPs

First, we compile the inidividual subject stan model.

In [11]:
model = pystan.StanModel(file='../stan_models/single_subject_non-additive_weibull.stan')

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_8b64186d7c72474f92436047d86f728a NOW.


Next we gather the map estimates for each subject's shift, scale, and shape parameters split by condition. 

In [13]:
maps = []
for subject in subjects:
    print(subject)
    sub_data = data[data.participant_id == subject]
    rt_c = sub_data[sub_data.trial_type == 'congruent'].response_time
    rt_i = sub_data[sub_data.trial_type == 'incongruent'].response_time
    data_in = {'Ni': len(rt_i), 'Nc': len(rt_c), 'rt_c': rt_c, 'rt_i': rt_i}
    
    op = model.optimizing(data=data_in, seed=8)
    maps.append(op)
print('Done!')

sub-hc001
sub-hc002
sub-hc003
sub-hc004
sub-hc005
sub-hc006
sub-hc007
sub-hc008
sub-hc009
sub-hc010
sub-hc011
sub-hc012
sub-hc013
sub-hc014
sub-hc015
sub-hc016
sub-hc017
sub-hc018
sub-hc019
sub-hc020
sub-hc021
sub-hc022
sub-hc023
sub-hc024
sub-hc025
sub-hc026
sub-hc027
sub-hc028
sub-hc029
sub-hc030
sub-hc031
sub-hc032
sub-hc033
sub-hc034
sub-hc035
sub-hc036
sub-hc037
sub-hc042
sub-hc044
sub-hc045
sub-pp001
sub-pp002
sub-pp003
sub-pp004
sub-pp005
sub-pp006
sub-pp007
sub-pp008
sub-pp009
sub-pp010
sub-pp011
sub-pp012
sub-pp013
sub-pp014
sub-pp015
sub-pp016
sub-hc038
sub-hc041
sub-hc047
Done!


Next, we sift through the map distribution fits to each subject's data as a sanity check of how well we can fit.

In [14]:
def plot_subject_fit(subject):
    f, ax = plt.subplots(1, 1, figsize=(16, 8))
    colors = ['#e41a1c', '#377eb8']
    conditions = ['incongruent', 'congruent']
    sub_ix = subjects.index(subject)
    mapp = maps[sub_ix]
    sub_data = data[data.participant_id == subject]
    
    for i, c in enumerate(conditions):
        rt = sub_data[sub_data.trial_type == c].response_time
        x = np.arange(mapp['shift_%s' % c[0]], 1.75, .01)
        sns.distplot(rt, color=colors[i], ax=ax, kde=False, 
                     norm_hist=True)
        plt.plot(x, weibull(x, mapp['shape_%s' % c[0]], 
                            mapp['scale_%s' % c[0]], 
                            mapp['shift_%s' % c[0]]), color=colors[i])
    
    plt.legend(conditions)
    plt.xlim((0, 1.75))
    plt.ylim((0, 4))
    plt.show()
    
interact(plot_subject_fit, subject=subjects);

Finally, we look at the parameter map distributions to get a sense of reasonable range of expected parameters.

In [18]:
def plot_map_dist(param):
    f, ax = plt.subplots(1, 1, figsize=(16, 8))
    colors = ['#e41a1c', '#377eb8']
    conditions = ['incongruent', 'congruent']
    for i, c in enumerate(conditions):
        values = [m['%s_%s' % (param, c[0])] for m in maps]
        sns.distplot(values, color=colors[i], bins=20)
    plt.legend(conditions)
    plt.xlabel(param)
    plt.show()

interact(plot_map_dist, param=['shift', 'scale', 'shape']);

### Visualizing Prior Distribution

For the hiearchical model, we are not going to model each condition with a separate distribution. Instead, we will do a regression approach.

First we, look at the priors for the scale parameter.

In [19]:
def plot_scale_prior(shape1, scale1, shape2, scale2):
    f, axs = plt.subplots(1, 3, figsize=(16, 8))
    
    gamma1 = gamma(shape1, scale=scale1)
    shape3 = gamma1.mean()
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[0].plot(x, gamma1.pdf(x))
    
    gamma2 = gamma(shape2, scale=scale2)
    scale3 = gamma2.mean()
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[1].plot(x, gamma2.pdf(x))
    
    gamma3 = gamma(shape3, scale=scale3)
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[2].plot(x, gamma3.pdf(x))
    plt.show()

interact(plot_scale_prior, shape1=(0, 3, .1), scale1=(0, 4, .1), 
         shape2=(0, 3, .1), scale2=(0, 4, .1));

Shape prior

In [7]:
def plot_shape_prior(shape1, scale1, shape2, scale2):
    f, axs = plt.subplots(1, 3, figsize=(16, 8))
    
    gamma1 = gamma(shape1, scale=scale1)
    shape3 = gamma1.mean()
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[0].plot(x, gamma1.pdf(x))
    
    gamma2 = gamma(shape2, scale=scale2)
    scale3 = gamma2.mean()
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[1].plot(x, gamma2.pdf(x))
    
    gamma3 = gamma(shape3, scale=scale3)
    x = np.arange(0, 7, .01, dtype=np.float64)
    axs[2].plot(x, gamma3.pdf(x))
    plt.show()

interact(plot_shape_prior, shape1=(0, 3, .1), scale1=(0, 4, .01), 
         shape2=(0, 3, .1), scale2=(0, 4, .01));

## Hierarchical Model Fitting 

### Prepare Data

In [2]:
ns = len(data.participant_id.unique())
ll = data.participant_id.astype('category').cat.codes + 1
rt = data.response_time
tt = data.trial_type.astype('category').cat.codes

min_rts = np.array(data.groupby(['participant_id', 'trial_type']).response_time.min())
min_rt_i = min_rts[1::2]
min_rt_c = min_rts[::2]
        
data_in = {'Ns': ns, 'll': ll, 'Nt': len(rt), 'rt': rt, 'tt': tt, 
           'min_rt_i': min_rt_i, 'min_rt_c': min_rt_c}

### Compile & Fit Model

In [3]:
init_dict = {'k_beta0_scale': 2, 'theta_beta0_scale': 0.5, 'k_beta0_shape': 2, 
             'theta_beta0_shape': 0.5, 'beta0_scale': [.2] * len(subjects), 
             'beta0_shape': [2] * len(subjects),
             'mu_beta1_scale': 0.2, 'beta1_scale': [0.1] * len(subjects), 'sigma_beta1_scale': 1,
             'mu_beta1_shape': 0.2, 'beta1_shape': [0.5] * len(subjects), 'sigma_beta1_shape': 1}
keep_params = init_dict.keys() + ['beta0_shift', 'beta1_shift']
model_fit = fit_model('weibull_hierarchical', data_in, n_iter=100, n_chains=3,
                      init=init_dict, seed=7, keep_params=keep_params)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL weibull_hierarchical_4cc0713bf9a173743a2ab57a55c24b69 NOW.


Starting Model Fit...
Compiling Model...
Compiling took 0 min. 38 sec.
Computing MAP Estimates...
Finding MAP estimates took 0 min. 39 sec.
Sampling from Posterior...
Drawing 100 Posterior Samples took 68 min. 20 sec.
Extracting Samples...
Extracting samples took 0 min. 0 sec.
Extracting Fit Summary...
Extracting fit summary took 0 min. 31 sec.
Pickling Model Fit...
Pickling model fit took 0 min. 0 sec.
Total Time: took 70 min. 10 sec.
Finished


### Plot Results

In [3]:
model_fit = pickle.load(open('../models/weibull_hierarchical/weibull_hierarchical.pkl', 'r'))

#### Plot the Posteriors

In [8]:
interact(plot_posterior, param=model_fit['map'].keys(), model_fit=fixed(model_fit),
         subject=subjects, subjects=fixed(subjects));

#### Plot the MAP Fits

In [10]:
interact(plot_weibull_subject_fit, model_fit=fixed(model_fit), 
         behavior=fixed(data), subject=subjects, subjects=fixed(subjects));