In [1]:
import numpy as np
import pandas as pd
from pandas import read_csv
import json

import os, sys
from os.path import dirname


ROOT_DIR = '.'


In [2]:
#import data
raw_data = read_csv(os.path.join(ROOT_DIR,'data','data.csv'))
data = raw_data.copy()

# data rejections
reject = read_csv(os.path.join(ROOT_DIR,'data','reject.csv'))
data = data[data.subject.isin(reject.query('reject==0').subject)]

## Sort data.
f = lambda x: x.subject + '_' + x.rune
data['stimulus'] = np.unique(data.apply(f, 1), return_inverse=True)[-1] + 1
data = data.sort_values(['stimulus','exposure']).reset_index(drop=True)

data['outcome'] = data.outcome.replace({10:1, 1:0, -1:1, -10:0})
data['valence'] = data.valence.replace({'win':1, 'lose':0})

cols_trt = ['session','stimulus']
cols_ic = ['block','stimulus']



In [11]:
data_i = data[data.session == 2]
## Define metadata.
print(data_i.stimulus.nunique())
print(data_i.exposure.nunique())
np.unique(data_i[data_i.exposure==1].subject, return_inverse=True)[-1] + 1

2328
12


array([ 1,  1,  1, ..., 97, 97, 97])

In [4]:
## data
for i in [1,2,3]:
    data_i = data[data.session == i]
    ## Define metadata.
    N = data_i.stimulus.nunique()
    E = data_i.exposure.nunique()
    S = np.unique(data_i[data_i.exposure==1].subject, return_inverse=True)[-1] + 1

    ## Prepare task variables.
    R = data_i.pivot_table('outcome','exposure','stimulus').fillna(0).values

    ## Prepare response variables.
    Y = data_i.pivot_table('choice','exposure','stimulus').fillna(0).values.astype(int)

    ## Prepare valance variables.

    V = data_i.pivot_table('valence','exposure','stimulus').fillna(0).values.astype(int)
    
    C = data_i.pivot_table('choice','exposure','stimulus').notnull().values.astype(int)
    C = np.where(data_i.pivot_table('rt','exposure','stimulus',dropna=False).values < 0.2, 0, C)

    #print sanity check
    print(E,N)
    print(R.shape)
    print(Y.shape)
    print(V.shape)
    print(C.shape)

    model_data = dict(N=N, 
                  E=E, 
                  C=C.tolist(),
                  S=S.tolist(), 
                  Y=Y.tolist(), 
                  V=V.tolist(),
                  R=R.tolist())

#     json_data = json.dumps(model_data)

#     f = open("stan_data/s{}_data.json".format(i),"w")
#     f.write(json_data)
#     f.close()
    
#     data_i.to_csv("stan_data/s{}_dataframe.csv".format(i), index=False)



12 2640
(12, 2640)
(12, 2640)
(12, 2640)
(12, 2640)
12 2328
(12, 2328)
(12, 2328)
(12, 2328)
(12, 2328)
12 2376
(12, 2376)
(12, 2376)
(12, 2376)
(12, 2376)


In [3]:
########### reliability

In [13]:
## Restrict to participants with all data available.
data_trt = data.groupby('subject').filter(lambda x: x.session.nunique() == 3)

## Sort data.
f = lambda x: x.subject + '_' + str(x.session) + '_' + x.rune
data_trt.loc[:,'stimulus'] = np.unique(data_trt.apply(f, 1), return_inverse=True)[-1] + 1
data_trt.sort_values(['stimulus','exposure'], inplace=True)
data_trt.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [93]:
for i,j in [[1,2],[2,3],[1,3]]:
    data_ij = data_trt.query('session == {} or session == {}'.format(i,j))
    
    ## Define metadata.
    N = data_ij.stimulus.nunique() 
    E = data_ij.exposure.nunique()
    print(E)

12
12
12


In [88]:
## test retest
for i,j in [[1,2],[2,3],[1,3]]:
    data_ij = data_trt.query('session == {} or session == {}'.format(i,j))
    
    ## Define metadata.
    N = data_ij.stimulus.nunique() 
    E = data_ij.exposure.nunique()
    S = np.unique(data_ij.query(f'exposure==1').subject, return_inverse=True)[-1] + 1
    M = np.unique(data_ij.query(f'exposure==1').session, return_inverse=True)[-1] + 1


    ## Prepare task variables.
    R = data_ij.pivot_table('outcome','exposure', cols_trt).fillna(0).values.reshape(E,1,N).astype(int).swapaxes(0,1)[0]

    ## Prepare response variables.
    Y = data_ij.pivot_table('choice','exposure', cols_trt).fillna(0).values.reshape(E,1,N).astype(int).swapaxes(0,1)[0]

    ## Prepare valance variables.
    V = data_ij.pivot_table('valence','exposure',cols_trt).fillna(0).values.reshape(E,1,N).swapaxes(0,1)[0]
    
    C = data_ij.fillna(99).pivot_table('rt','exposure',cols_trt).values.reshape(E,1,N).swapaxes(0,1)[0]
    C = np.invert(np.logical_or(np.isnan(C), C < 0.2)).astype(int)

    #print sanity check
    print(E,N)
    print(S.shape)
    print(R.shape)
    print(Y.shape)
    print(V.shape)
    print(C.shape)

    model_data = dict(N=N, 
                  E=E, 
                  C=C.tolist(),
                  S=S.tolist(), 
                  M=M.tolist(), 
                  Y=Y.tolist(), 
                  V=V.tolist(),
                  R=R.tolist())

    
#     json_data = json.dumps(model_data)

#     f = open("stan_data/trt_{}_{}_data.json".format(i,j),"w")
#     f.write(json_data)
#     f.close()    
    



12 4368
(4368,)
(12, 4368)
(12, 4368)
(12, 4368)
(12, 4368)
12 4368
(4368,)
(12, 4368)
(12, 4368)
(12, 4368)
(12, 4368)
12 4368
(4368,)
(12, 4368)
(12, 4368)
(12, 4368)
(12, 4368)


In [None]:
### split half

In [78]:
## Restrict to participants with all data available.
data_ic = data.groupby('subject').filter(lambda x: x.session.nunique() == 3)

## Sort data.
f = lambda x: x.subject + '_' + str(x.session) + '_' + x.rune
data_ic.loc[:,'stimulus'] = np.unique(data_ic.apply(f, 1), return_inverse=True)[-1] + 1
data_ic.sort_values(['stimulus','exposure'], inplace=True)
data_ic.reset_index(drop=True, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [90]:
## internal consistency
for ses in [1,2,3]:

    data_ij = data_ic.query('session == {}'.format(ses))
    ## Define metadata.
    N = data_ij.stimulus.nunique()
    E = data_ij.exposure.nunique()
    S = np.unique(data_ij.query(f'exposure==1').subject, return_inverse=True)[-1] + 1
    M = np.unique(data_ij.query(f'exposure==1').block, return_inverse=True)[-1] + 1


    ## Prepare task variables.
    R = data_ij.pivot_table('outcome','exposure', cols_ic).fillna(0).values.reshape(E,1,N).astype(int).swapaxes(0,1)[0]

    ## Prepare response variables.
    Y = data_ij.pivot_table('choice','exposure', cols_ic).fillna(0).values.reshape(E,1,N).astype(int).swapaxes(0,1)[0]

    ## Prepare valance variables.
    V = data_ij.pivot_table('valence','exposure', cols_ic).fillna(0).values.reshape(E,1,N).swapaxes(0,1)[0]
    
    C = data_ij.fillna(99).pivot_table('rt','exposure', cols_ic).values.reshape(E,1,N).swapaxes(0,1)[0]
    C = np.invert(np.logical_or(np.isnan(C), C < 0.2)).astype(int)

    #print sanity check
    print(E,N)
    print(S.shape)
    print(M.shape)
    print(R.shape)
    print(Y.shape)
    print(V.shape)
    print(C.shape)

    model_data = dict(N=N, 
                  E=E, 
                  C=C.tolist(),
                  S=S.tolist(), 
                  M=M.tolist(), 
                  Y=Y.tolist(), 
                  V=V.tolist(),
                  R=R.tolist())

    
#     json_data = json.dumps(model_data)

#     f = open("stan_data/ic_s_{}_data.json".format(ses),"w")
#     f.write(json_data)
#     f.close()    
    



12 2184
(2184,)
(2184,)
(12, 2184)
(12, 2184)
(12, 2184)
(12, 2184)
12 2184
(2184,)
(2184,)
(12, 2184)
(12, 2184)
(12, 2184)
(12, 2184)
12 2184
(2184,)
(2184,)
(12, 2184)
(12, 2184)
(12, 2184)
(12, 2184)


In [75]:
# # Opening JSON file
# r = open('stan_data/m1_s1_data.json')
  
# # a dictionary
# dd = json.load(r)

# #reformat
# dd['S'] = np.array(dd['S'])
# dd['Y'] = np.array(dd['Y'])
# dd['R'] = np.array(dd['R'])
# dd['V'] = np.array(dd['V'])
