# Clean Task Data
This script takes the raw response data from the PsychoPy program that presented the task to participants in the MRI scanner and creates a trial-level dataframe with all the trials, response time, accuracy, and trial type information. It also excludes trials that we won't be using in our analysis, such as outliers, incorrect responses, and rotated trials.

## 1: Set up environment and get task metadata

In [None]:
rm(list=ls())
library(reshape2)
library(plyr)

##Set path to CSV files with raw task data
data_dir<-"/Users/steventompson/Git/tompson_netlearn_fmri/data/subj_data/raw_task_data"

########################################################################################################
########################################################################################################
### Step 1: Get metadata for each file in data folder ###
########################################################################################################
########################################################################################################


##List the files in the data folder
filenames <- list.files(data_dir)

metaData<-data.frame("pID"=rep(NA,length(filenames)),"Cond"=rep(NA,length(filenames)),"Run"=rep(NA,length(filenames)))
for(i in 1:length(filenames)){
  file1<-strsplit(filenames[i],"_")
  metaData$pID[i]<-as.numeric(gsub("subj","",file1[[1]][[1]]))
  metaData$Cond[i]<-gsub("log","",file1[[1]][[2]])
  metaData$Run[i]<-as.numeric(gsub("run|.csv","",file1[[1]][[3]]))
  metaData$StartTime[i]<-file.info(filenames[i])$mtime
}

## 2: Load and merge subject task data into single dataframe
Loop over each subject and run and load the raw task data file, then add trial-level data to dataframe.

In [None]:
########################################################################################################
########################################################################################################
### Step 2: Combine each run's data into single file ###
########################################################################################################
########################################################################################################

##Combine the data files by row
##Useful for when the columns are identical and you're just trying to merge cases
#combined <- do.call("rbind", lapply(filenames, read.csv, header = TRUE))

#create empty data frame to add data to
combData<-data.frame()

for(i in 1:length(filenames)){
  #set file name and read the data for that file into R
  fileName<-filenames[i]
  subData<-read.csv(paste(data_dir,fileName,sep="/"),header=T,stringsAsFactors=F)

  #Count total number of missing values
  metaData$missedResp[i]<-sum(as.character(subData$resp_raw)=="'NA'")
  #as.character(gsub("[']","",subData$resp_raw))
  
  #Add condition variable and remove extra rows
  subData$Cond<-metaData$Cond[i]
  subData<-subset(subData,subset=!is.na(subData$trialNum))
  #append subData to end of the full data file
  combData<-rbind(combData,subData)
}

#Convert variables to remove unnecessary characters and fix data types
combData$pID<-as.numeric(combData$pID)
combData$resp_raw<-gsub("[']","",combData$resp_raw) #remove unnecessary characters
combData$rt_raw<-as.numeric(gsub("[']","",combData$rt_raw)) #remove unnecessary characters
combData$rt_raw<-combData$rt_raw*1000 #change RT to milliseconds

#remove unnecessary variables
rm(i)
rm(file1)
rm(subData)
rm(fileName)

In [None]:
#check summary data for each subject to determine who to exclude

print('# of Trials')
print((tapply(combData$walk,combData$pID,function(i){sum(!is.na(i),na.rm=T)}))) #number of trials
print('')
print('# of Trials with No Response')
print((tapply(combData$rt_raw,combData$pID,function(i){sum(is.na(i),na.rm=T)}))) #number of trials with no response
print('')
print('Percent Correct Trials')
print((tapply(combData$correct_raw,combData$pID,function(i){sum(i,na.rm=T)/2000}))) #percent correct trials


#[1] "# of Trials"
#   1    2    4    5    8    9   11   12   13   15   16   18   19   20   21   23 
#2000 2000 2000 2000 2000 2000 2000 2000 1000 2000 2000 2000 2000 2000 2000 2000 
#  24   25   26   27   28   29   30   31   32   33   34   35   36   37   39 
#2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 
#[1] ""
#[1] "# of Trials with No Response"
#   1    2    4    5    8    9   11   12   13   15   16   18   19   20   21   23 
#  98   45   35   55   86   69   82   56   32  314   54   81   77  669   32  274 
#  24   25   26   27   28   29   30   31   32   33   34   35   36   37   39 
#  65   38   61   59 1339   34   56   88   82   50  273   88   41  334   73 
#[1] ""
#[1] "Percent Correct Trials"
#     1      2      4      5      8      9     11     12     13     15     16 
#0.7540 0.9230 0.8975 0.9010 0.9005 0.9085 0.8535 0.8845 0.3640 0.6020 0.9025 
#    18     19     20     21     23     24     25     26     27     28     29 
#0.8705 0.8610 0.4290 0.9580 0.7220 0.8960 0.9280 0.7190 0.8875 0.2390 0.9295 
#    30     31     32     33     34     35     36     37     39 
#0.8950 0.8590 0.7935 0.9025 0.7565 0.8320 0.8385 0.6340 0.9120 

## 3: Identify transition and non-transition trials
Add variable for node type to each trial based on the location of the image in the network and the preceding trial(s).

In [None]:
########################################################################################################
########################################################################################################
### Step 3: Add variables to each trial ###
########################################################################################################
########################################################################################################



#count number of times trial presented in last 10 trials & number of trials since image last shown
#add transition groupings
#0<->14, 4<->5, 9<->10
#If two trials in a row are 0->14, 14->0, 4->5, 5->4, 9->10, 10->9
#set first trial as pre-transition node and second trial as transition node 


#Split into NS and Soc datasets

combDataNS<-subset(combData,subset=combData$Cond=="NS")
combDataSoc<-subset(combData,subset=combData$Cond=="Soc")


#set default values
combDataNS$pre<-0
combDataNS$transition<-0
combDataNS$grouping<-"x"

for(subj in unique(combDataNS$pID)){
  subjData<-combDataNS[combDataNS$pID==subj,]
  for(trial in subjData$trialNum){
    node<-subjData$walk[subjData$trialNum==trial]
    
    #Create groupings
    if(trial>1){
      pre_node<-subjData$walk[subjData$trialNum==(trial-1)]
      if((pre_node==0&node==9)|(pre_node==9&node==0)|(pre_node==4&node==5)|(pre_node==5&node==4)){
        combDataNS$pre[combDataNS$pID==subj & combDataNS$trialNum==(trial-1)] <- 1
        combDataNS$transition[combDataNS$pID==subj & combDataNS$trialNum==trial] <- 1
        if(combDataNS$transition[combDataNS$pID==subj & combDataNS$trialNum==trial]==1 & combDataNS$transition[combDataNS$pID==subj & combDataNS$trialNum==(trial-1)]!=1){
          combDataNS$grouping[combDataNS$pID==subj & combDataNS$trialNum==(trial-1)] <- "x"
          combDataNS$grouping[combDataNS$pID==subj & combDataNS$trialNum==trial] <- "transition"
        }
      }
    }
  }
}



#set default values
combDataSoc$pre<-0
combDataSoc$transition<-0
combDataSoc$grouping<-"x"

for(subj in unique(combDataSoc$pID)){
  subjData<-combDataSoc[combDataSoc$pID==subj,]
  for(trial in subjData$trialNum){
    node<-subjData$walk[subjData$trialNum==trial]
    
    #Create groupings
    if(trial>1){
      pre_node<-subjData$walk[subjData$trialNum==(trial-1)]
      if((pre_node==0&node==9)|(pre_node==9&node==0)|(pre_node==4&node==5)|(pre_node==5&node==4)){
        combDataSoc$pre[combDataSoc$pID==subj & combDataSoc$trialNum==(trial-1)] <- 1
        combDataSoc$transition[combDataSoc$pID==subj & combDataSoc$trialNum==trial] <- 1
        if(combDataSoc$transition[combDataSoc$pID==subj & combDataSoc$trialNum==trial]==1 & combDataSoc$transition[combDataSoc$pID==subj & combDataSoc$trialNum==(trial-1)]!=1){
          combDataSoc$grouping[combDataSoc$pID==subj & combDataSoc$trialNum==(trial-1)] <- "x"
          combDataSoc$grouping[combDataSoc$pID==subj & combDataSoc$trialNum==trial] <- "transition"
        }
      }
    }
  }
}


#Clean up
rm(node,pre_node,subj,trial,subjData)

combData<-rbind(combDataNS,combDataSoc)

In [None]:
#Save data
savepath<-"/Users/steventompson/Git/tompson_netlearn_fmri/data/subj_data"
savename<-"tompson_netlearn_fmri_trial_data.csv"

write.csv(combData,paste(savepath,savename,sep="/"))


Steven Tompson | 2019