# SST (Spatial Stress Test) Analysis

fMRI Data Collected 2015-2016

In [None]:
import pandas as pd
import moss
from scipy import stats
import scipy as sp
import seaborn as sns
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os.path as op
import re

#widget ability
from IPython.html.widgets import interact, interactive, fixed
from IPython.html import widgets
from IPython.display import clear_output, display, HTML

import glob

# Gather project info & functions
from sst_setup_project import *

# for plotting
sns.set(style='whitegrid', context='poster')
%matplotlib inline

### Set up directories & exp-specific information

In [None]:
dirs = dict()
dirs['basedir'] = op.join(op.expanduser('~'), 'Experiments/SST')
dirs['datadir'] = op.join(dirs['basedir'], 'data')
dirs['analydir'] = op.join(dirs['basedir'], 'analysis')
dirs['subj_info_file'] = op.join(dirs['datadir'], 'subj_info.csv')
dirs['shortcut_file'] = op.join(dirs['datadir'], 'shortcut_coords.csv')
dirs['navdir'] = '/Volumes/group/awagner/sgagnon/SST/nav_data'
dirs['order_file'] = op.join(dirs['basedir'], 'data','SST_env_order.csv')

In [None]:
proj = gather_experiment_info(exp_name='SST', dirs=dirs)

### Read in subject information

In [None]:
subj_info = pd.read_csv(dirs['subj_info_file'])
subj_info = subj_info[pd.isnull(subj_info.remove)]
subj_info[['subid']] = subj_info.subid.astype(str)
subj_info

In [None]:
shortcut_coords = pd.read_csv(dirs['shortcut_file'])
shortcut_coords

In [None]:
trial_order = pd.read_csv(dirs['order_file'])
trial_order[['subid']] = trial_order[['subid']].astype(str)
trial_order.head()

In [None]:
len(trial_order.subid.unique())

## Read in data files

In [None]:
subj_info.subid

In [None]:
df = pd.DataFrame() # dataframe of subjs x envs
test = True
questionnaires_shock  = False
questionnaires_post  = False

# iterate through subjects
for subid in subj_info.subid:
    print subid
    
    if test:
        for run_num in range(1, 12+1):
            # add test file
            
            if int(subid) < 10:
                test_file = glob.glob(op.join(dirs['navdir'], 'sst0' + str(subid), 'run'+ str(run_num),
                                      'session_*', 'log.txt'))[0]
            else:
                test_file = glob.glob(op.join(dirs['navdir'], 'sst' + str(subid), 'run'+ str(run_num),
                                      'session_*', 'log.txt'))[0]    

            output = []
            with open(test_file) as f:
                data = f.readlines()

                for line in data:
                    columns = re.split('\t|\r|\n', line)
                    output.append(columns)
            d2 = pd.DataFrame(output, columns = ['time', 'c2', 'command', 
                                                 'c3', 'c4', 'c5', 'c6', 'c7', 'c8'])

            # include subid and env
            d2['subid'] = subid
            d2['run'] = run_num

            if run_num < 5:
                d2['type'] = 'habit'
            else:
                d2['type'] = 'shortcut'
            
            # force time to be integer, subtract trigger time from each run
            d2.time = d2.time.astype(int)
            d2.time = d2.time - d2.loc[d2.command == 'SCAN', 'time'].values[0]
            
            # convert to seconds
            d2.time = d2.time/1000
            d2.time = d2.time - 8 # adjust for tossed volumes (4 TRs, 8 s (2s TR))

            df = df.append(d2, ignore_index=True)

# Compiled group data
if questionnaires_shock:
    q_file = op.join(dirs['basedir'], 'data/Quest/Questionnaires_shockblock_group.csv')
    dq_shock = pd.read_csv(q_file, index_col=0)

if questionnaires_post:
    q_file = op.join(dirs['basedir'], 'data/Quest/Questionnaires_group.csv')
    dq_post = pd.read_csv(q_file, index_col=0)
    dq_post['subid'] = dq_post.subid.astype(str)

## Look at data

In [None]:
df.head()

### Look at 2D position in space

In [None]:
command_list = ['SCAN', 'ORIENT', 'ASSIGNED', 'NAVIGATE', 'ARRIVED']

In [None]:
df.head()

In [None]:
dp = df[df.command.isin(command_list)].reset_index()
dp = dp.drop(['c2', 'c4', 'c5', 'c6', 'c7', 'c8', 'index'], 1) # remove unncessary cols
dp.head()

### Drop non-targets

In [None]:
targets = ['zzz', 'Jim_Parsons', 'Beyonce', 'Paul_McCartney', 
 'Natalie_Portman', 'Benedict_Cumberbatch',
 'Taylor_Swift', 'Katy_Perry', 'Johnny_Depp', 
 'Zooey_Deschanel', 'George_Clooney', 'Mark_Zuckerberg','Emma_Watson']


In [None]:
dp = dp.drop(dp[(dp.command == "ARRIVED") & (~dp.c3.isin(targets))].index).reset_index()

In [None]:
len(dp[dp.command == "NAVIGATE"].time)

In [None]:
len(dp[dp.command == "ARRIVED"].time)

In [None]:
pd.groupby(dp[dp.command == "ARRIVED"], by=['subid']).count()

### Get indexes for navigated, and first arrived

In [None]:
ind = np.concatenate((dp[(dp.command == "NAVIGATE")].index, 
                      dp[(dp.command == "NAVIGATE")].index + 1))

In [None]:
dp.head()

In [None]:
dp.loc[ind].groupby(by=['subid']).count()

Are these the same shape?

In [None]:
dsub = dp.loc[ind]

print dsub[dsub.command == "NAVIGATE"].shape
print dsub[dsub.command == "ARRIVED"].shape

In [None]:
nav_vals = dsub[dsub.command == "NAVIGATE"].subid.values
arr_vals = dsub[dsub.command == "ARRIVED"].subid.values


for x in np.arange(len(arr_vals)):
    if nav_vals[x] != arr_vals[x]:
        print nav_vals[x]

In [None]:
nav_duration = dsub[dsub.command == "ARRIVED"].time.values - dsub[dsub.command == "NAVIGATE"].time.values
len(nav_duration)

In [None]:
dp['duration'] = 0
dp.loc[dp.command == 'NAVIGATE', 'duration'] = nav_duration

In [None]:
ind = np.concatenate((dp[(dp.command == "NAVIGATE")].index, 
                      dp[(dp.command == "NAVIGATE")].index + 1, #first arrived
                      dp[(dp.command == "ORIENT")].index,
                      dp[(dp.command == "ASSIGNED")].index))
dp = dp.loc[ind].reset_index()

In [None]:
dp.loc[dp.command == 'ORIENT', 'duration'] = 11
dp.loc[dp.command == 'ASSIGNED', 'duration'] = 8
dp.loc[dp.command == 'ARRIVED', 'duration'] = 1

In [None]:
dp.head()

In [None]:
dp.groupby(by=['subid']).count()/4

In [None]:
plt.boxplot(dp[(dp.command == 'NAVIGATE')].duration.values)

In [None]:
dpnavdur = dp[(dp.command == 'NAVIGATE')]
dpnavdur.drop(['level_0', 'index', 'c3'], axis=1, inplace=True)
dpnavdur.to_csv('/Volumes/group/awagner/sgagnon/SST/analysis/nav/group_nav_durations.csv', index=False)

#### Distribution of min navigation times

In [None]:
dp[(dp.command == 'NAVIGATE')].groupby(['subid']).min().reset_index().duration.values

In [None]:
dp.head()

## Add environment labels to df

In [None]:
trial_order.head()

In [None]:
dp.drop(['level_0', 'index'], axis=1, inplace=True)
dp = dp.sort(columns=['subid', 'run', 'time']).reset_index()
dp.head()

In [None]:
orient_onsets = dp.loc[dp.command == 'ORIENT']
orient_onsets.drop(['index'], axis=1, inplace=True)
orient_onsets.head()

In [None]:
orient_onsets['trial'] = 0 # init trial number

for counter, ind in enumerate(orient_onsets.index):
    if counter == 0: # first trial
        first_ind = ind
        orient_onsets.loc[ind, 'trial'] = 1
        prev_ind = ind
    else:
        if orient_onsets.loc[ind, 'run'] == orient_onsets.loc[prev_ind, 'run']:
            orient_onsets.loc[ind, 'trial'] = orient_onsets.loc[prev_ind, 'trial'] + 1
            prev_ind = ind
        else:
            orient_onsets.loc[ind, 'trial'] = 1
            prev_ind = ind

In [None]:
orient_onsets.head()

In [None]:
orient_onsets = orient_onsets.reset_index().merge(trial_order)
orient_onsets.head()

In [None]:
dp.head()

In [None]:
dp['env'] = np.nan
dp['rep'] = np.nan
dp['trial'] = np.nan

for i in orient_onsets.index:
    index_val = orient_onsets.loc[i, 'index']
    dp.loc[index_val, 'env'] = 'env' + orient_onsets.loc[i, 'env'].astype(str)
    dp.loc[index_val, 'rep'] = orient_onsets.loc[i, 'rep']
    dp.loc[index_val, 'trial'] = orient_onsets.loc[i, 'trial']

dp.env.fillna(method='ffill', inplace=True)
dp.rep.fillna(method='ffill', inplace=True)
dp.trial.fillna(method='ffill', inplace=True)
dp.head()

In [None]:
print len('env' + orient_onsets.env.astype(str))
print len(dp.loc[orient_onsets['index'], 'env'])

## Generate onsets 

In [43]:
output_filename = 'nav_cond.csv'
output_dir = '/Volumes/group/awagner/sgagnon/SST/data'

for sub in subj_info.subid:
    print sub
    
    dsub = dp[(dp.subid == sub)]
    ddesign = pd.DataFrame({'run': dsub.run, 
                            'condition': dsub.command,
                            'onset': dsub.time,
                            'duration': dsub.duration,
                            'value': 1})
    
    # model some things as impulse rather than boxcar:
    ddesign.loc[ddesign.condition.isin(['ASSIGNED', 'ARRIVED']), 'duration'] = 0
    
    if int(sub) < 10:
        sub_output_dir = op.join(output_dir, 'sst0' + str(sub), 'design')
    else:
        sub_output_dir = op.join(output_dir, 'sst' + str(sub), 'design')
        
    print sub_output_dir
    
    if not os.path.exists(sub_output_dir):
        os.makedirs(sub_output_dir)
    
    ddesign.to_csv(op.join(sub_output_dir, output_filename), index=False)

1
/Volumes/group/awagner/sgagnon/SST/data/sst01/design
2
/Volumes/group/awagner/sgagnon/SST/data/sst02/design
3
/Volumes/group/awagner/sgagnon/SST/data/sst03/design
4
/Volumes/group/awagner/sgagnon/SST/data/sst04/design
5
/Volumes/group/awagner/sgagnon/SST/data/sst05/design
6
/Volumes/group/awagner/sgagnon/SST/data/sst06/design
7
/Volumes/group/awagner/sgagnon/SST/data/sst07/design
9
/Volumes/group/awagner/sgagnon/SST/data/sst09/design
10
/Volumes/group/awagner/sgagnon/SST/data/sst10/design
11
/Volumes/group/awagner/sgagnon/SST/data/sst11/design
12
/Volumes/group/awagner/sgagnon/SST/data/sst12/design
13
/Volumes/group/awagner/sgagnon/SST/data/sst13/design
14
/Volumes/group/awagner/sgagnon/SST/data/sst14/design
15
/Volumes/group/awagner/sgagnon/SST/data/sst15/design
16
/Volumes/group/awagner/sgagnon/SST/data/sst16/design
18
/Volumes/group/awagner/sgagnon/SST/data/sst18/design
19
/Volumes/group/awagner/sgagnon/SST/data/sst19/design


In [44]:
dp.head()

Unnamed: 0,index,time,command,c3,subid,run,type,duration,env,rep,trial
0,1218,0.278,ORIENT,oriented_to_env,1,1,habit,11.0,env1,1,1
1,1827,11.308,ASSIGNED,zzz,1,1,habit,8.0,env1,1,1
2,0,19.294,NAVIGATE,started_navigation,1,1,habit,49.284,env1,1,1
3,609,68.578,ARRIVED,zzz,1,1,habit,1.0,env1,1,1
4,1219,82.554,ORIENT,oriented_to_env,1,1,habit,11.0,env8,1,2


In [45]:
output_filename = 'nav_cond_byenv.csv'
output_dir = '/Volumes/group/awagner/sgagnon/SST/data'

for sub in subj_info.subid:
    print sub
    
    dsub = dp[(dp.subid == sub)]
    dsub['command_detailed'] = dsub.command + '_' + dsub['type'] + '_' + dsub.env + '_rep' + dsub.rep.map(int).map(str)
    ddesign = pd.DataFrame({'run': dsub.run, 
                            'condition': dsub.command_detailed, 
                            'onset': dsub.time,
                            'duration': dsub.duration,
                            'value': 1})
    
    # model some things as impulse rather than boxcar:
    ddesign.loc[ddesign.condition.isin(['ASSIGNED', 'ARRIVED']), 'duration'] = 0
    
    if int(sub) < 10:
        sub_output_dir = op.join(output_dir, 'sst0' + str(sub), 'design')
    else:
        sub_output_dir = op.join(output_dir, 'sst' + str(sub), 'design')
        
    print sub_output_dir
    
    if not os.path.exists(sub_output_dir):
        os.makedirs(sub_output_dir)
    
    ddesign.to_csv(op.join(sub_output_dir, output_filename), index=False)

1
/Volumes/group/awagner/sgagnon/SST/data/sst01/design
2
/Volumes/group/awagner/sgagnon/SST/data/sst02/design
3
/Volumes/group/awagner/sgagnon/SST/data/sst03/design
4
/Volumes/group/awagner/sgagnon/SST/data/sst04/design
5
/Volumes/group/awagner/sgagnon/SST/data/sst05/design
6
/Volumes/group/awagner/sgagnon/SST/data/sst06/design
7
/Volumes/group/awagner/sgagnon/SST/data/sst07/design
9
/Volumes/group/awagner/sgagnon/SST/data/sst09/design
10
/Volumes/group/awagner/sgagnon/SST/data/sst10/design
11
/Volumes/group/awagner/sgagnon/SST/data/sst11/design
12
/Volumes/group/awagner/sgagnon/SST/data/sst12/design
13
/Volumes/group/awagner/sgagnon/SST/data/sst13/design
14
/Volumes/group/awagner/sgagnon/SST/data/sst14/design
15
/Volumes/group/awagner/sgagnon/SST/data/sst15/design
16
/Volumes/group/awagner/sgagnon/SST/data/sst16/design
18
/Volumes/group/awagner/sgagnon/SST/data/sst18/design
19
/Volumes/group/awagner/sgagnon/SST/data/sst19/design


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


## If just modeling the "orient" period to remove that variance from model.

In [None]:
dp.loc[dp.condition == 'ORIENT']

In [None]:
output_filename = 'nav_ORIENT.csv'
output_dir = '/Volumes/group/awagner/sgagnon/SST/data'

dp_orient = dp.loc[dp.condition == 'ORIENT']

for sub in subj_info.subid:
    print sub
    
    dsub = dp_orient[(dp_orient.subid == sub)]
    ddesign = pd.DataFrame({'run': dsub.run, 
                            'condition': dsub.command,
                            'onset': dsub.time,
                            'duration': dsub.duration,
                            'value': 1})
        
    if int(sub) < 10:
        sub_output_dir = op.join(output_dir, 'sst0' + str(sub), 'design')
    else:
        sub_output_dir = op.join(output_dir, 'sst' + str(sub), 'design')
        
    print sub_output_dir
    
    if not os.path.exists(sub_output_dir):
        os.makedirs(sub_output_dir)
    
    ddesign.to_csv(op.join(sub_output_dir, output_filename), index=False)