In [None]:
###############-------------------------#############################-------------------------###########################-------
# DATA ORGANIZING & EXPLORATION #
###############-------------------------#############################-------------------------###########################-------

# This notebook contains some steps and functions needed to clean the data and explore the choice behavior of 
# human participants who performed a modified version of a volatile two-armed bandit task (Browning et al., 2015; Gagne et al., 2020)


In [None]:
## load the packages 
library(ggplot2)
library(tidyverse)
library(Hmisc)
library(readr)
library(dichromat)

## 1. Data Cleaning

In [None]:
# Load and read the task data
path <- "C:/Users/selin/OneDrive/Documents/GitHub/SelinProject_SVV/Jan26/dataJan26"
PilotData <- list.files(path, full.names = TRUE) %>% 
  lapply(read_csv) %>% 
  bind_rows 

In [None]:
# Find those who missed more than 3 trials in a row

TooManyMissed <- data.frame(SubjectID = character(), Block = character(), NumberOfMissed = numeric())

for (i in 1:length(unique(PilotData$PID))){
 dataTemp <- PilotData %>% filter(PID == unique(PilotData$PID)[i])  %>%  filter(TrialNo > 0)
 for (j in 1:length(unique(dataTemp$Block_name))){
     dataTemp0 <- dataTemp  %>% filter(Block_name == unique(dataTemp$Block_name)[j])
     MissedTrials <- dataTemp0$TrialNo[dataTemp0$response == "None"]
     result <- rle(diff(MissedTrials))
      if(any(result$lengths >= 2 & result$values==1)){
           TooManyMissed <- TooManyMissed  %>%  add_row(SubjectID = as.character(unique(PilotData$PID)[i]), 
                                                        Block = as.character(unique(dataTemp$Block_name)[j]), 
                                                        NumberOfMissed = length(MissedTrials))
        }
    }
}
print(TooManyMissed)
unique(TooManyMissed$SubjectID) # 9 people 

# Exclude those with too many missing trials
PilotData <- PilotData  %>% filter(!(PID %in% TooManyMissed$SubjectID))

In [None]:
# Too many of the same (button) response in a row

RespInARow <- function(data, maxnum){
    SameResponse <- data.frame(SubjectID = character(), Block = character(), NumberSame = numeric())

    for (i in 1:length(unique(data$PID))){
         dataTemp <- data %>% filter(PID == unique(data$PID)[i])  %>%  filter(TrialNo > 0)
         for (j in 1:length(unique(dataTemp$Block_name))){
             dataTemp0 <- dataTemp  %>% filter(Block_name == unique(dataTemp$Block_name)[j])
             ResponseRepeats <- rle(dataTemp0$response)
             if(any(ResponseRepeats$lengths > maxnum)){
                SameResponse <- SameResponse  %>%  add_row(SubjectID = as.character(unique(data$PID)[i]), 
                                                           Block = as.character(unique(dataTemp$Block_name)[j]), 
                                                           NumberSame = max(ResponseRepeats$lengths[which(ResponseRepeats$lengths > 10)]))
             }
         }
     }
    return(SameResponse)
}

SameResponse <- RespInARow(PilotData, 13)
unique(SameResponse$SubjectID) 
length(unique(SameResponse$SubjectID)) # 8 people

SameResponse_strict <- RespInARow(PilotData, 10)
unique(SameResponse_strict$SubjectID) 
length(unique(SameResponse_strict$SubjectID)) #21 people

PilotData <- PilotData  %>% filter(!(PID %in% SameResponse$SubjectID))
write.csv(PilotData, file = paste("PilotData.csv")) # 192 people

## 2. Exploration

In [None]:
## Plots based on ONLY MAGNITUDE information (e.g., magnitude on left/right, choice left/right)

base_par <- "/data"

# intialize data set for magnitude information
data_magnitude <- data.frame()

# loop over individual subject data
for (iPar in 1:nSubjects) {
    
  # load data
  data_file <- paste0(getwd(),base_par,iPar,".csv")
  data <- read.csv(data_file)
    
  # keep only the rows with Trial numbers > 0 corresponding to task trials 
  # select only the relevant columns
  data <- data  %>% filter(TrialNo > 0) 
  data <- data %>% select(c(TrialNo, Block_name, Magnitude_left, Magnitude_right, response))

  # make sure the magnitude information is numeric
  data$Magnitude_right <- as.numeric(data$Magnitude_right)
  data$Magnitude_left <- as.numeric(data$Magnitude_left)
    
  # add a variable showing whether the smaller magnitude stimulus is chosen
  Smaller <- c()
  SmallerChosen <- c()
  for (i in 1:dim(data)[1]){
    if (data$Magnitude_left[i] < data$Magnitude_right[i]){
        data$Smaller[i] <- "z" 
    } else if (data$Magnitude_left[i] > data$Magnitude_right[i]){
        data$Smaller[i] <- "m"
    } else {
        data$Smaller[i] <- "both"
    }
      if (data$response[i] == data$Smaller[i]){
          data$SmallerChosen[i] <- 1
      } else if (data$Smaller[i] == "both") {
          data$SmallerChosen[i] <- 1
          } else {
         data$SmallerChosen[i] <- 0
      }
  }
    
  # add the subject ID 
  data$SubjectID <- iPar
  # add the data to the data frame
  data_magnitude <- rbind(data_magnitude, data)
}

data_magnitude[data_magnitude$response == "None",]$SmallerChosen <- NA


In [None]:
# Magnitude-Choice Plots for each subject

# Adjust the display size of the plots
options(repr.plot.width=6, repr.plot.height=3) 

AllSubjectsPlot <- function(data, ChosenOption){
    PlotList <-  vector(mode = "list", length = nSubjects)
    for (iPar in 1:nSubjects){
        ggmag <- ggplot(data = data[data$SubjectID == iPar,], aes(TrialNo, ChosenOption)) +
                 stat_smooth(method = "loess", formula = y ~ x, color = "maroon2", size = 1) +
                 geom_point(size=1, alpha = 1) +
                 facet_wrap(~ SubjectID + Block_name) +
                 scale_x_continuous(breaks = seq(0,60,5)) +
                 theme_bw() + theme(strip.text.x = element_text(margin = margin(.02, 0, .02, 0, "cm")))
                 PlotList[[iPar]] <- ggmag
    }

    return(PlotList)
}

PlotList <- AllSubjectsPlot(data_magnitude, SmallerChosen)
PlotList

In [None]:
## Plots based on ONLY PROBABILITY information (e.g., optimal choice based on the higher vs. lower "no loss" probability)

base_par <- "/data"

# intialize data set for magnitude information
data_probability <- data.frame()

# loop over individual subject data
for (iPar in 1:nSubjects) {
    
  # load data
  data_file <- paste0(getwd(),base_par,iPar,".csv")
  data <- read.csv(data_file)

  # keep only the rows with Trial numbers > 0 corresponding to task trials 
  # select only the relevant columns
  data <- data  %>% filter(TrialNo > 0) 
  data <- data %>% select(c(TrialNo, Block_name, optimal, response, correct_keyboard_response1))
  
  # add the subject ID 
  data$SubjectID <- iPar
    
  # add the data to the data frame
  data_probability <- rbind(data_probability, data)
}

data_probability$correct_keyboard_response1 <- as.numeric(data_probability$correct_keyboard_response1)
data_probability[data_probability$response == "None",]$correct_keyboard_response1 <- NA
data_probability$correct_keyboard_response1[data_probability$correct_keyboard_response1 == 1] = 0
data_probability$correct_keyboard_response1[data_probability$correct_keyboard_response1 == 2] = 1

#print(data_probability)

In [None]:
# Probability-Choice Plots for each subject

# Adjust the display size of the plots
options(repr.plot.width=6, repr.plot.height=3) 

PlotList2 <- AllSubjectsPlot(data_probability, correct_keyboard_response1)
PlotList2

In [None]:
# The cumulative accuracy proportion

base_par <- "/data"

# intialize data set for magnitude information
data_probability <- data.frame()

# loop over individual subject data
for (iPar in 1:nSubjects) {
    
  # load data
  data_file <- paste0(getwd(),base_par,iPar,".csv")
  data <- read.csv(data_file)

  # keep only the rows with Trial numbers > 0 corresponding to task trials 
  # select only the relevant columns
  data <- data  %>% filter(TrialNo > 0) 
  data <- data %>% select(c(TrialNo, Block_name, optimal, response, correct_keyboard_response1))
    
  # correct the data type
  data$correct_keyboard_response1 <- as.numeric(data$correct_keyboard_response1)
  data$correct_keyboard_response1[data$correct_keyboard_response1 == 1] = 0
  data$correct_keyboard_response1[data$correct_keyboard_response1 == 2] = 1
  data$Block_name <- as.factor(data$Block_name)
    

  # Blocks
  Blocks <- unique(data$Block_name) 

  # Calculate the cumulative accuracy per block
    
  acc_prob <- c()

  for (b in 1:length(Blocks)){
      
  block_acc_prob <- c()
  block_acc_prob[1] <- data[data$Block_name == Blocks[b],]$correct_keyboard_response1[1]
  for (i in 2:dim(data[data$Block_name == Blocks[b],])[1]){
      block_acc_prob[i] <- ((block_acc_prob[i-1]*(i-1)) + data[data$Block_name == Blocks[b],]$correct_keyboard_response1[i])/i
  }
  acc_prob <- c(acc_prob, block_acc_prob)
}
    
  data$acc_prob <- acc_prob
    
  # add the subject ID 
  data$SubjectID <- iPar
    
  # add the data to the data frame
  data_probability <- rbind(data_probability, data)
}


In [None]:
# Preparing for plotting the data separately for different trial sequences subjects were randomly assigned to
DataPlot <- data_probability
DataPlot1 <- DataPlot  %>% filter(Block_name %in% c("Block_PS1","Block_PV1","Block_RS1","Block_RV1" ))
DataPlot2 <- DataPlot  %>% filter(Block_name %in% c("Block_PS2","Block_PV2","Block_RS2","Block_RV2" ))
DataPlot3 <- DataPlot  %>% filter(Block_name %in% c("Block_PS3","Block_PV3","Block_RS3","Block_RV3" ))
DataPlot4 <- DataPlot  %>% filter(Block_name %in% c("Block_PS4","Block_PV4","Block_RS4","Block_RV4" ))

In [None]:
# Plotting the data separately for these sequences
options(repr.plot.width=6, repr.plot.height=3) 


PlotSequenceAcc <- function(data){
    ggplot(data = data , aes(x = TrialNo, y = acc_prob, , color = Block_name, group = Block_name)) +
    stat_summary(fun.data = "mean_cl_boot", geom = "smooth", se = TRUE, alpha = 0.2) +
    scale_x_continuous(breaks = seq(0,60,5)) +
    theme_bw() + 
    # Add a vertical line segment
    geom_segment(aes(x = 15, y = 0, xend = 15, yend = 1)) +
    geom_segment(aes(x = 30, y = 0, xend = 30, yend = 1)) +
    geom_segment(aes(x = 45, y = 0, xend = 45, yend = 1))

print(Plot_all1)
}

PlotSequenceAcc(DataPlot1)
PlotSequenceAcc(DataPlot2)
PlotSequenceAcc(DataPlot3)
PlotSequenceAcc(DataPlot4)