# Parameter recovery of the hierarchical DDM with starting point bias

In [1]:
import rlssm
import pandas as pd

## Simulate group data

In [2]:
from rlssm.random import simulate_hier_ddm

In [3]:
data = simulate_hier_ddm(n_trials=200, 
                         n_participants=15,
                         gen_mu_drift=.6, gen_sd_drift=.3,
                         gen_mu_threshold=.5, gen_sd_threshold=.1,
                         gen_mu_ndt=-1.2, gen_sd_ndt=.05,
                         gen_mu_rel_sp=.1, gen_sd_rel_sp=.05)

In [4]:
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,drift,threshold,ndt,rel_sp,rt,accuracy
participant,trial,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,1,0.983582,0.978231,0.280752,0.551736,0.357752,1.0
1,1,0.983582,0.978231,0.280752,0.551736,0.355752,1.0
1,1,0.983582,0.978231,0.280752,0.551736,0.608752,1.0
1,1,0.983582,0.978231,0.280752,0.551736,0.399752,0.0
1,1,0.983582,0.978231,0.280752,0.551736,0.578752,1.0


In [5]:
data.groupby('participant').describe()[['rt', 'accuracy']]

Unnamed: 0_level_0,rt,rt,rt,rt,rt,rt,rt,rt,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy,accuracy
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
participant,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
1,200.0,0.499472,0.161841,0.304752,0.372502,0.460752,0.572502,1.091752,200.0,0.775,0.41863,0.0,1.0,1.0,1.0,1.0
2,200.0,0.456176,0.182551,0.277111,0.335861,0.400611,0.498861,1.511111,200.0,0.51,0.501154,0.0,0.0,1.0,1.0,1.0
3,200.0,0.486097,0.192538,0.275247,0.356997,0.426247,0.548497,1.400247,200.0,0.645,0.479714,0.0,0.0,1.0,1.0,1.0
4,200.0,0.450853,0.154547,0.281233,0.336733,0.400733,0.524233,1.193233,200.0,0.775,0.41863,0.0,1.0,1.0,1.0,1.0
5,200.0,0.478677,0.161718,0.281397,0.359397,0.430397,0.555647,1.077397,200.0,0.76,0.428155,0.0,1.0,1.0,1.0,1.0
6,200.0,0.497351,0.181062,0.292506,0.380506,0.436006,0.563756,1.582506,200.0,0.655,0.476561,0.0,0.0,1.0,1.0,1.0
7,200.0,0.522194,0.196174,0.298539,0.384289,0.469039,0.616539,1.684539,200.0,0.68,0.467647,0.0,0.0,1.0,1.0,1.0
8,200.0,0.530402,0.205284,0.282492,0.373742,0.465492,0.638742,1.184492,200.0,0.71,0.454901,0.0,0.0,1.0,1.0,1.0
9,200.0,0.440167,0.152475,0.269202,0.332202,0.387702,0.499202,1.264202,200.0,0.585,0.493958,0.0,0.0,1.0,1.0,1.0
10,200.0,0.536524,0.222326,0.285469,0.378969,0.490969,0.621969,1.603469,200.0,0.655,0.476561,0.0,0.0,1.0,1.0,1.0


## Initialize the model

In [6]:
model = rlssm.DDModel(hierarchical_levels = 2, starting_point_bias=True)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_334b674f19a64fdd819bde85b5c747b0 NOW.


## Fit

In [7]:
# sampling parameters
n_iter = 5000
n_chains = 2
n_thin = 1

# bayesian model, change default priors:
drift_priors = {'mu_mu':1, 'sd_mu':1, 'mu_sd':0, 'sd_sd':1}
threshold_priors = {'mu_mu':0, 'sd_mu':1, 'mu_sd':0, 'sd_sd':1}
ndt_priors = {'mu_mu':-1.5, 'sd_mu':.1, 'mu_sd':0, 'sd_sd':.1}
rel_sp_priors = {'mu_mu':0, 'sd_mu':.5, 'mu_sd':0, 'sd_sd':.8}

In [8]:
model_fit = model.fit(
    data,
    drift_priors=drift_priors,
    threshold_priors=threshold_priors,
    ndt_priors=ndt_priors,
    rel_sp_priors=rel_sp_priors,
    thin = n_thin,
    iter = n_iter,
    chains = n_chains,
    verbose = False)

RuntimeError: Initialization failed.

### get Rhat

In [None]:
model_fit.rhat.describe()

### calculate wAIC

In [None]:
model_fit.waic

## Posteriors

In [None]:
model_fit.samples.describe()

In [None]:
import seaborn as sns
sns.set(context = "talk", 
        style = "white", 
        palette = "husl",
        rc={'figure.figsize':(15, 8)})

Here we plot the estimated posterior distributions against the generating parameters, to see whether the model parameters are recovering well:

In [None]:
g = model_fit.plot_posteriors(height=5, show_intervals='HDI')

for i, ax in enumerate(g.axes.flatten()):
    ax.axvline(data[['drift', 'threshold', 'ndt', 'rel_sp']].mean().values[i], color='grey', linestyle='--')

## Posterior predictives

### Ungrouped

In [None]:
pp_summary = model_fit.get_posterior_predictives_summary(n_posterior_predictives=100)
pp_summary

In [None]:
model_fit.plot_mean_posterior_predictives(n_posterior_predictives=100, figsize=(20,8), show_intervals='HDI');

In [None]:
model_fit.plot_quantiles_posterior_predictives(n_posterior_predictives=100, kind='shades');

### Grouped

In [None]:
model_fit.get_grouped_posterior_predictives_summary(
                grouping_vars=['participant'],
                quantiles=[.3, .5, .7],
                n_posterior_predictives=100)

In [None]:
model_fit.plot_mean_grouped_posterior_predictives(grouping_vars=['participant'], 
                                                  n_posterior_predictives=100, 
                                                  figsize=(20,8));