In [1]:
import os
import glob as glob
import pandas as pd
import numpy as np

import events prep

In [5]:
data_dir = '/home/finc/Dropbox/Projects/SelfReg/data/behavioral/raw/'
files = np.sort(os.listdir(data_dir))

sub = 's358'
sub = files[0]

file_path = glob.glob(f'{data_dir}{sub}/*surveyMedley.csv')[0]
file_name = os.path.basename(file_path)

In [6]:
#df= pd.read_csv(glob.glob(f'{data_dir}{files[0]}/*surveyMedley.csv')[0])
df = pd.read_csv(file_path)
df.columns

Index(['rt', 'key_press', 'trial_type', 'trial_index', 'time_elapsed',
       'internal_node_id', 'subject', 'text', 'block_duration',
       'timing_post_trial', 'trial_id', 'stimulus', 'possible_responses',
       'stim_duration', 'survey', 'item_coding', 'item_text', 'options',
       'response', 'coded_response', 'exp_id'],
      dtype='object')

In [7]:
start_time = df.query('trial_id == "fmri_trigger_wait"').iloc[-1]['time_elapsed']
df.time_elapsed-=start_time
# correct start time for problematic scans
df.time_elapsed-=get_timing_correction(file_name)



In [8]:
def get_timing_correction(filey, TR=680, n_TRs=14):
    problematic_files = ['s568_MotorStop.csv', 's568_Stroop.csv', 
                         's568_SurveyMedley.csv', 's568_DPX.csv',
                         's568_Discount.csv',
                         's556_MotorStop.csv', 's556_Stroop.csv', 
                         's556_SurveyMedley.csv', 's556_DPX.csv',
                         's556_Discount.csv',
                         's561_WATT.csv', 's561_ANT.csv', 
                         's561_TwoByTwo.csv', 's561_CCT.csv',
                         's561_StopSignal.csv',]
    tr_correction = TR * n_TRs
    if filey in problematic_files:
        return tr_correction
    else:
        return 0


def get_drop_columns(df, columns=None, use_default=True):
    default_cols = ['block_duration', 'correct_response', 'exp_stage', 
                    'feedback_duration', 'possible_responses', 
                   'rt', 'stim_duration', 'text', 'time_elapsed',
                   'timing_post_trial', 'trial_num']
    drop_columns = []
    if columns is not None:
        drop_columns = columns
    if use_default == True:
        drop_columns = set(default_cols) | set(drop_columns)
    drop_columns = set(df.columns) & set(drop_columns)
    return drop_columns

def get_junk_trials(df):
    junk = pd.Series(False, df.index)
    if 'correct' in df.columns:
        junk = np.logical_or(junk,np.logical_not(df.correct))
    if 'rt' in df.columns:
        junk = np.logical_or(junk,df.rt < 50)
    return junk

def get_movement_times(df):
    """
    time elapsed is evaluated at the end of a trial, so we have to subtract
    timing post trial and the entire block duration to get the time when
    the trial started. Then add the reaction time to get the time of movement
    """
    trial_time = df.time_elapsed - df.block_duration - df.timing_post_trial + \
                 df.rt
    return trial_time

def get_trial_times(df):
    """
    time elapsed is evaluated at the end of a trial, so we have to subtract
    timing post trial and the entire block duration to get the time when
    the trial started
    """
    trial_time = df.time_elapsed - df.block_duration - df.timing_post_trial
    return trial_time

def create_survey_event(df, duration=None):
    columns_to_drop = get_drop_columns(df, 
                                       use_default=False,
                                       columns = ['block_duration',
                                                  'trial_index',     #added 
                                                  'internal_node_id', #added
                                                  'exp_id', #added
                                                  'key_press',
                                                  'options',
                                                  'response',
                                                  #'rt',
                                                  'stim_duration',
                                                  'stimulus', #added
                                                  'text',
                                                  'time_elapsed',
                                                  'timing_post_trial',
                                                  'trial_id',
                                                  'trial_type'])
    events_df = df[df['time_elapsed']>0]
    # add junk regressor
    events_df.loc[:,'junk'] = get_junk_trials(df)
    # add duration and response regressor
    if duration is None:
        events_df.insert(0,'duration',events_df.stim_duration)
    else:
        events_df.insert(0,'duration',duration)
        
    events_df.insert(0,'response_time',events_df.rt-events_df.rt[events_df.rt>0].mean())
    # time elapsed is at the end of the trial, so have to remove the block 
    # duration
    events_df.insert(0,'onset',get_trial_times(df))
    # add motor onsets
    events_df.insert(0,'movement_onset',get_movement_times(df))
    # convert milliseconds to seconds
    events_df.loc[:,['response_time','onset','duration',
                     'movement_onset']]/=1000
    # drop unnecessary columns
    events_df = events_df.drop(columns_to_drop, axis=1)
    return events_df

In [9]:
df_clean = create_survey_event(df)
df_clean = df_clean.dropna(axis=0)
df_clean.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


Unnamed: 0,movement_onset,onset,response_time,duration,rt,subject,possible_responses,survey,item_coding,item_text,coded_response,junk
25,7.143,1.004,0.887381,8.5,6139,s061,6689718277,brief,reverse,Pleasure and fun sometimes keep me from gettin...,2.0,False
26,18.44,10.21,2.978381,8.5,8230,s061,66897182,upps,forward,"Sometimes when I feel bad, I can't seem to sto...",2.0,False
27,24.125,19.415,-0.541619,8.5,4710,s061,6689718277,brief,forward,I am good at resisting temptation.,3.0,False
28,34.366,28.688,0.426381,8.5,5678,s061,6689718277,grit,reverse,New ideas and projects sometimes distract me f...,3.0,False
29,42.763,37.894,-0.382619,8.5,4869,s061,6689718277,future_time,forward,There are only limited possibilities in my fut...,3.0,False


In [10]:
df_clean['trial_type'] = df_clean['item_text'].map(get_items_order())

NameError: name 'get_items_order' is not defined

In [145]:
df_clean

Unnamed: 0,movement_onset,onset,response_time,duration,rt,subject,possible_responses,survey,item_coding,item_text,coded_response,junk,trial_type
25,7.143,1.004,0.887381,8.5,6139,s061,6689718277,brief,reverse,Pleasure and fun sometimes keep me from gettin...,2.0,False,Q17
26,18.44,10.21,2.978381,8.5,8230,s061,66897182,upps,forward,"Sometimes when I feel bad, I can't seem to sto...",2.0,False,Q35
27,24.125,19.415,-0.541619,8.5,4710,s061,6689718277,brief,forward,I am good at resisting temptation.,3.0,False,Q09
28,34.366,28.688,0.426381,8.5,5678,s061,6689718277,grit,reverse,New ideas and projects sometimes distract me f...,3.0,False,Q01
29,42.763,37.894,-0.382619,8.5,4869,s061,6689718277,future_time,forward,There are only limited possibilities in my fut...,3.0,False,Q30
30,51.173,47.304,-1.382619,8.5,3869,s061,6689718277,brief,forward,I refuse things that are bad for me.,4.0,False,Q14
31,63.017,56.441,1.324381,8.5,6576,s061,6689718277,grit,reverse,I have difficulty maintaining my focus on proj...,2.0,False,Q06
32,69.397,65.716,-1.570619,8.5,3681,s061,6689718277,future_time,forward,I could do anything I want in the future.,3.0,False,Q27
33,80.873,75.057,0.564381,8.5,5816,s061,6689718277,brief,forward,People would say that I have iron self-discipl...,4.0,False,Q16
34,92.696,84.195,3.249381,8.5,8501,s061,66897182,upps,forward,I tend to act without thinking when I am reall...,2.0,False,Q40


In [40]:
df.columns

Index(['rt', 'key_press', 'trial_type', 'trial_index', 'time_elapsed',
       'internal_node_id', 'subject', 'text', 'block_duration',
       'timing_post_trial', 'trial_id', 'stimulus', 'possible_responses',
       'stim_duration', 'survey', 'item_coding', 'item_text', 'options',
       'response', 'coded_response', 'exp_id'],
      dtype='object')

In [41]:
glob('../behavioral_data/raw/*/*')

TypeError: 'module' object is not callable