# Data Analysis: Manipulating CS Variability

Januar - 2021, Kathrin Reichmann

Content: 
    (1) Data preparation
    (2) Demographics


## Read Tables

Load packages

In [6]:
setwd("\\\\sn00.zdv.uni-tuebingen.de/siskr01/Documents/Github/CSCond_online/data")

library(dplyr)
#library(tidyverse)

Read Tables

In [7]:
filenames <- dir()
dat <- data.frame()
for (i in filenames){
  dat <- rbind(dat, read.table(i, header = TRUE, sep = ",", encoding = "UTF-8"))
}

In [21]:
str(dat)

"number of columns of result is not a multiple of vector length (arg 2)"

Check comments

In [9]:
comments_one <- dat$responses[dat$trial_type == "survey-text" & dat$condition == "one_one"]
#comments_one

In [10]:
comments_many <- dat$responses[dat$trial_type == "survey-text" & dat$condition == "many_one"]
#comments_many

## Data Prep

Exclude participants?

In [11]:
# total before excluding participants
length(unique(dat$subject))

In [12]:
dat$subject <- factor(dat$subject)
dat$response <- as.numeric(dat$response)
dat$responses <- as.numeric(dat$responses)
dat$rt <- as.numeric(dat$rt)

In [13]:
#chinese speaking participants
#0 = ja
get_tables <- aggregate(response ~ subject + chinese, dat, mean)
table(get_tables$chinese)
dat <- dat[!dat$chinese == "0",]


  0   1   2 
 24 211   3 

In [14]:
#participants who did not pay attention
get_tables <- aggregate(response ~ subject + hinschauen, dat, mean)
table(get_tables$hinschauen) #1 = nicht hinschauen
dat <- dat[!dat$hinschauen == "1",]
#table(dat$hinschauen)


  0   1 
213   1 

In [15]:
#who were distracted
get_tables <- aggregate(response ~ subject + ablenkung, dat, mean)
table(get_tables$ablenkung) # 0 = abgelenkt
dat <- dat[!dat$ablenkung == "0",]
#table(dat$ablenkung)


  0   1 
 10 200 

In [16]:
#who did not follow instructions
get_tables <- aggregate(response ~ subject + gewissenhaft, dat, mean)
table(get_tables$gewissenhaft) # 1 = nicht gewissenhaft


  0 
200 

In [60]:
#always clicked the same answer in direct measure
subjects <- ggplot(dat, aes (x= subject, y = response, col = subject)) +
                geom_point(show.legend = FALSE)
#subjects

In [326]:
#answered too slowly in indirect measure (rt > 3 SDs)
exclude <- aggregate(rt ~ subject, dat, mean)
exclusion_sd <- sd(na.omit(dat$rt))*3
exclusion_sd
exclude[exclude$rt > exclusion_sd,]

subject,rt


In [17]:
#total after excluding participants
length(unique(dat$subject))

In [18]:
#condition
get_tables <- aggregate(response ~ subject + condition, dat, mean)
table(get_tables$condition)


many_one  one_one 
      99      101 

In [19]:
#measure
get_tables <- aggregate(response ~ subject + measure, dat, mean)
table(get_tables$measure)


indirect_first   direct_first 
            93            107 

delete columns that we don't need

In [20]:
#data.frame(colnames(dat))
dat[,1:5] <- NULL
dat$codeword <- NULL
dat$url <- NULL

convert to factor later on

In [21]:
as_factor <- c("subject", "val", "condition_code", "condition", "measure", "measure_code", "type", "type_specific", "category", "cs_selected")

Adapt names of stimuli

In [22]:
as.factor(gsub(".png$", "", dat$target))-> dat$target
as.factor(gsub("targets/", "", dat$target))-> dat$target
as.factor(gsub(".png$", "", dat$cs_selected))-> dat$cs_selected
as.factor(gsub("category", "", dat$cs_selected))-> dat$cs_selected

## Demographics 

In [23]:
demographics <- dat[c('subject', 'age', 'gender', 'education', 'chinese', 'handedness')]
demographics <- aggregate(. ~ subject, demographics, median);


In [24]:
table(demographics$education)
#education
#1	Studierende/r
#2	Berufstätig
#3	Arbeitssuchend
#4	keine Angabe


  1   2   4 
189  10   1 

In [25]:
##gender
#0	weiblich
#1	männlich
#2	divers
#3	keine Angabe
table(demographics$gender)
#149/201
#48/201


  0   1   2   3 
149  48   1   2 

In [26]:
summary(demographics$age)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  18.00   21.00   23.00   24.14   25.00   69.00 

## Direct Measure 

### Data Preparation & Extract relevant columns

In [27]:
#extract relevant columns
direct <- dat[dat$task == "direct" | dat$task == "all",] 
direct <- direct[c('response', 'subject', 'condition', 'condition_code', 'measure', 'measure_code', 'val', 'type', 'type_specific', 'category', 'cs_selected')]

for (factor in as_factor){
  direct[, factor] <- as.factor(direct[,factor])
}

direct$type_specific <- factor(direct$type_specific, levels = c('CS', 'GSold', 'GSnew', 'abstract', 'all'))
direct$response <- as.numeric(direct$response)
#str(direct)

In [None]:
#rename levels of type_specific

direct$type_specific <- factor(direct$type_specific, levels = c("CS", "GSold", "GSnew", "abstract", "all"), labels = c("CS", "GSsim", "GSdiff", "Feature", "All"))


In [100]:
#export files
setwd("\\\\sn00.zdv.uni-tuebingen.de/siskr01/Documents/Github/CSCond_analysis/CSCond_analysis/data")
#setwd("C:/Users/reich/Documents/GitHub/CSCond_analysis")
write.csv2(direct, file = 'direct.csv')

##  Indirect Measure

indirect: 0 = unangenehm, 1 = angenehm

### Data preparation & Extract relevant columns

In [49]:
#extract relevant columns
indirect <- dat[dat$task == "indirect",]
indirect <- indirect[c('indirect', 'rt', 'subject', 'condition', 'condition_code', 'measure', 'measure_code', 'val', 'type', 'type_specific', 'category', 'cs_selected', 'target', 'nr_pres')]

as_factor <- append(as_factor, "target")
for (factor in as_factor){
  indirect[, factor] <- as.factor(indirect[,factor])
}

In [50]:
indirect$response <- as.numeric(indirect$indirect)
indirect$indirect <- NULL
indirect$nr_pres <- as.numeric(indirect$nr_pres)
indirect$rt <- as.numeric(indirect$rt)
str(indirect)

'data.frame':	8921 obs. of  14 variables:
 $ rt            : num  68 76 18 5 10 25 13 110 44 53 ...
 $ subject       : Factor w/ 238 levels "02a80kdxm7","03rysffm8e",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ condition     : Factor w/ 2 levels "many_one","one_one": 1 1 1 1 1 1 1 1 1 1 ...
 $ condition_code: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ measure       : Factor w/ 2 levels "indirect_first",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ measure_code  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ val           : Factor w/ 3 levels "","neg","pos": 3 3 3 3 3 2 2 3 3 2 ...
 $ type          : Factor w/ 4 levels "","All","CS",..: 3 3 3 3 4 3 3 3 3 4 ...
 $ type_specific : Factor w/ 6 levels "","abstract",..: 4 4 4 4 6 4 4 4 4 6 ...
 $ category      : Factor w/ 4 levels "1","2","3","4": 3 4 4 3 3 2 1 4 4 2 ...
 $ cs_selected   : Factor w/ 61 levels "","1/CS1","1/CS10",..: 35 47 51 41 34 17 8 56 53 24 ...
 $ target        : Factor w/ 49 levels "","1","10","11",..: 41 32 17 23 16 10 38 43 

In [51]:
indirect$type_specific <- factor(indirect$type_specific, levels = c('CS', 'GSold', 'GSnew', 'abstract'))

In [232]:
#export files
#setwd("\\\\sn00.zdv.uni-tuebingen.de/siskr01/Documents/Github/CSCond_analysis/CSCond_analysis/data")
#setwd("C:/Users/reich/Documents/GitHub/CSCond_analysis")
#write.csv2(indirect, file = 'indirect.csv')