# Model recovery example with stimulus coding for the drift-rate,  $v$
  #### Here, $A_v = v$ and $B_v = - v$

In [1]:
import hddm
from patsy import dmatrix  # for generation of (regression) design matrices
import numpy as np         # for basic matrix operations
from pandas import Series  # to manipulate data-frames generated by hddm



In [2]:
# import sys
# sys.stdout = open('ModelRecoveryOutput_test_stim_coding.txt', 'w')

In [3]:
#set n subjects and n trials per condition
n_subjects = 10
trials_per_level = 150 # and per stimulus

In [4]:
#set generative ddm parameters for stimulus A
level1a = {'v':.1, 'a':2, 't':.3, 'z': 0.5}
level2a = {'v':.8, 'a':2, 't':.3, 'z': 0.5}
level3a = {'v':.3, 'a':2, 't':.3,  'z': 0.5}

all_params_stimA = [level1a, level2a, level3a]

In [5]:
#set generative ddm parameters for stimulus B 
level1b = {'v':-.1, 'a':2, 't':.3, 'z': 0.5}
level2b = {'v':-.8, 'a':2, 't':.3,  'z': 0.5}
level3b = {'v':-.3, 'a':2, 't':.3, 'z': 0.5}
all_params_stimB = [level1b, level2b, level3b]

In [None]:
hddm.generate.gen_rand_data

In [6]:
#generate the data given the above parameters
data_a, params_a = hddm.generate.gen_rand_data({'level1': level1a,
                                                'level2': level2a,
                                                'level3': level3a},
                                                size=trials_per_level,
                                                subjs=n_subjects)
data_b, params_b = hddm.generate.gen_rand_data({'level1': level1b,
                                                'level2': level2b,
                                                'level3': level3b},
                                                size=trials_per_level,
                                                subjs=n_subjects)


In [7]:
data_a.head(10)

Unnamed: 0,rt,response,subj_idx,condition
0,0.846241,1.0,0,level3
1,1.632241,0.0,0,level3
2,1.192241,1.0,0,level3
3,1.284241,1.0,0,level3
4,0.816241,0.0,0,level3
5,1.380241,1.0,0,level3
6,0.886241,0.0,0,level3
7,1.219241,0.0,0,level3
8,2.433241,1.0,0,level3
9,0.632241,1.0,0,level3


In [8]:
data_b.head(10)

Unnamed: 0,rt,response,subj_idx,condition
0,0.58712,0.0,0,level3
1,1.45712,0.0,0,level3
2,0.58812,0.0,0,level3
3,1.11612,0.0,0,level3
4,1.31312,0.0,0,level3
5,1.71312,1.0,0,level3
6,1.78012,1.0,0,level3
7,0.86612,0.0,0,level3
8,1.94512,0.0,0,level3
9,1.29812,1.0,0,level3


In [9]:
#identify the stimuli 
# a = 1 & b = 2
data_a['stimulus'] = Series(np.ones((len(data_a))), index=data_a.index)
data_b['stimulus'] = Series(np.ones((len(data_b)))*2, index=data_a.index)

In [83]:
#merge data_a & data_b
mydata = data_a.append(data_b, ignore_index=True)

In [84]:
mydata

Unnamed: 0,rt,response,subj_idx,condition,stimulus
0,0.846241,1.0,0,level3,1.0
1,1.632241,0.0,0,level3,1.0
2,1.192241,1.0,0,level3,1.0
3,1.284241,1.0,0,level3,1.0
4,0.816241,0.0,0,level3,1.0
5,1.380241,1.0,0,level3,1.0
6,0.886241,0.0,0,level3,1.0
7,1.219241,0.0,0,level3,1.0
8,2.433241,1.0,0,level3,1.0
9,0.632241,1.0,0,level3,1.0


In [80]:
?hddm.HDDMStimCoding

In [75]:
# params_full, params = hddm.generate.gen_rand_params(cond_dict={'v': [-1, 1], 'z': [.5, .5]})
# data, params_subj = hddm.generate.gen_rand_data(params=params_full, size=1000)
# test_stim_coding = hddm.HDDMStimCoding(data, stim_col='condition', split_param='v', 
#                                        depends_on={'v':'condition'})
# test_stim_coding.sample(10000, burn=5000)

  self.__name__ = input['__name__']


 [-----------------100%-----------------] 10000 of 10000 complete in 131.5 sec

  return reshape(newshape, order=order)
  return reshape(newshape, order=order)
  return reshape(newshape, order=order)
  return reshape(newshape, order=order)


<pymc.MCMC.MCMC at 0x7f717e432320>

In [76]:
params

{'a': 1.9941090771615857,
 'st': 0,
 'sv': 0,
 'sz': 0,
 't': 0.40346793356946653,
 'v(c0)': -1,
 'v(c1)': 1,
 'z(c0)': 0.5,
 'z(c1)': 0.5}

In [None]:
#set up the v link function to flip v for each stim
def v_link_func(x, data=mydata):
    stim = (np.asarray(dmatrix('0 + C(s,[[1],[-1]])', {'s':data.stimulus.ix[x.index]})))
    return x * stim
#here, stim is 1 & -1 and the result is multiplied appropriately 

In [None]:
#v does not need the inverse logit function because it's unconstrained. 
#though if originally simulated that way, it could be useful to do that. 

#if data are response coded, then v would need a link function, but only to transform
#drift-rate values associated with a & b to - & + values, respectively
v_reg =  {'model': 'v ~ 1 + C(condition)', 'link_func': v_link_func}

In [None]:
#the full regression description
reg_descr = [v_reg]

In [None]:
all_params_stimA, all_params_stimB

In [None]:
#run the regression model 
#informative = True is only for accuracy coded data.
m_reg = hddm.HDDMRegressor(mydata, reg_descr, include='z', informative=False)
m_reg.sample(1000, burn=20)

In [None]:
stats

In [None]:
#compare results to gen. model 
# m_reg.print_stats(['v_Intercept'])
stats = m_reg.gen_stats()
stats[stats.index.isin(['a', 't', 'z', 'v_Intercept', 
                        'v_C(condition)[T.level2]', 'v_C(condition)[T.level3]'])]

In [None]:
# test_stim_coding.gen_stats()

In [None]:
'gen_param stim A: ', all_params_stimA, 'gen_param stim B: ', all_params_stimB