In [5]:
library(dplyr)
library(data.table)
library(rjson)
library(skimr)
library(tableone)

setwd("~/Desktop/PhD/courses/2025_summer/TDBRAIN/TDBRAIN_participants_V2_data/")


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘data.table’


The following objects are masked from ‘package:dplyr’:

    between, first, last




In [6]:
participant <- fread("TDBRAIN_participants_V2.tsv")
participant <- participant %>%
    mutate(across(everything(), ~ stringr::str_replace_all(., "nan", "NA"))) %>%
    mutate(across(everything(), ~ na_if(., "NA")))

data_desc <- fromJSON(file = "TDBRAIN_participants_V2.json")

In [7]:
adhd <- participant %>% filter(indication == "ADHD" | formal_status == "ADHD" | indication == "HEALTHY")
adhd_treatment <- adhd %>% filter(!is.na(ADHD_pre_Hyp_leading))

dim(adhd)
dim(adhd_treatment)

In [8]:
data_desc_df <- data.frame(
  variable = names(data_desc),
  description = sapply(data_desc, function(x) x$description %||% NA),
  format = sapply(data_desc, function(x) x$format %||% NA),
  levels = sapply(data_desc, function(x) {
    if("levels" %in% names(x)) {
      if(is.list(x$levels)) {
        paste(names(x$levels), x$levels, sep = ": ", collapse = "; ")
      } else {
        as.character(x$levels)
      }
    } else {
      NA
    }
  }),
  stringsAsFactors = FALSE
)

In [9]:
vars <- data_desc_df %>% filter(!grepl("neoFFI", variable)) %>% filter(!grepl("YBOCS", variable))
vars$variable

In [10]:
vars

Unnamed: 0_level_0,variable,description,format,levels
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>
participant_id,participant_id,"8-digit code, unique for each subject",string,
indication,indication,Indication as described in manuscript,string,
formal Dx,formal Dx,Formal Diagnosis as described in manuscript,string,
Dataset,Dataset,Which publication the session belongs to,string,
Consent,Consent,Confirmation of consent,,YES
sessSeason,sessSeason,"Season of the sessions measurment meteorologically (Winter: December 1rst to March 1rst, Spring: March 1rst to June 1rst, Summer: June 1rst to September 1rst, Fall: September 1rst to December 1rst)",string,
sessTime,sessTime,measurement in morning <12pm or in the afternoon >12pm,string,
Responder,Responder,response to treatment (for ADHD_NF: either ADHD_post_Hyp or ADHD_post_Att improved with 25%; or for MDD-rTMS: BDI-post improved with more than 50% from BDI-pre);for OCD: post-YBOCS total improved with more than 35% from pre-YBOCS,,0: non response; 1: response
age,age,age of the participant,float,
gender,gender,sex of the participant,,0: Female; 1: Male


In [11]:
colnames(adhd)

In [12]:
table(adhd$Remitter)


 0  1 
23 43 

In [None]:
demographics <- adhd %>% 
    select(participants_ID, indication, formal_status, Dataset, sessSeason, sessTime,
           Responder, Remitter, age, gender, nrSessions,
           cigarette, coffee, beer, drugs, education, meal, sleep, 
           well, vision, hearing, "Weight (kg)", "Height (cm)",
           BDI_pre, BDI_post,
           ADHD_pre_Hyp_leading, ADHD_pre_Att_leading, 
           ADHD_post_Hyp_leading, ADHD_post_Att_leading, 
           `NF Protocol`, `rTMS PROTOCOL`,
           # Cognitive performance variables
           avg_rt_oddb_CP, avg_rt_oddb_FP, n_oddb_CP, n_oddb_FP, n_oddb_CN, n_oddb_FN,
           avg_rt_wm_CP, avg_rt_wm_FP, n_wm_CP, n_wm_FP, n_wm_CN, n_wm_FN) %>%
    mutate(
        # Convert numeric variables
        age = as.numeric(age),
        nrSessions = as.numeric(nrSessions),
        education = as.numeric(education),
        meal = as.numeric(meal),
        sleep = as.numeric(sleep),
        
        # Keep lifestyle "hours ago" variables as numeric but create categorical versions
        cigarette_raw = as.numeric(cigarette),
        coffee_raw = as.numeric(coffee),
        beer_raw = as.numeric(beer),
        drugs_raw = as.numeric(drugs),
        
        # Create numeric versions for analysis (99 → NA for "never")
        cigarette = ifelse(cigarette_raw == 99, NA, cigarette_raw),
        coffee = ifelse(coffee_raw == 99, NA, coffee_raw),
        beer = ifelse(beer_raw == 99, NA, beer_raw),
        drugs = ifelse(drugs_raw == 99, NA, drugs_raw),
        
        # Create categorical versions showing recent use vs never
        cigarette_cat = case_when(
            cigarette_raw == 99 ~ "Never",
            cigarette_raw <= 24 ~ "Within 24 hours",
            cigarette_raw > 24 ~ "More than 24 hours ago",
            TRUE ~ NA_character_
        ),
        coffee_cat = case_when(
            coffee_raw == 99 ~ "Never",
            coffee_raw <= 8 ~ "Within 8 hours",
            coffee_raw > 8 & coffee_raw < 99 ~ "More than 8 hours ago",
            TRUE ~ NA_character_
        ),
        beer_cat = case_when(
            beer_raw == 99 ~ "Never",
            beer_raw <= 24 ~ "Within 24 hours",
            beer_raw > 24 & beer_raw < 99 ~ "More than 24 hours ago",
            TRUE ~ NA_character_
        ),
        drugs_cat = case_when(
            drugs_raw == 99 ~ "Never",
            drugs_raw <= 24 ~ "Within 24 hours",
            drugs_raw > 24 & drugs_raw < 99 ~ "More than 24 hours ago",
            TRUE ~ NA_character_
        ),
        meal = as.numeric(meal),
        sleep = as.numeric(sleep),
        `Weight (kg)` = as.numeric(`Weight (kg)`),
        `Height (cm)` = as.numeric(`Height (cm)`),
        BDI_pre = as.numeric(BDI_pre),
        BDI_post = as.numeric(BDI_post),
        
        # ADHD scores
        ADHD_pre_Hyp_leading = as.numeric(ADHD_pre_Hyp_leading),
        ADHD_pre_Att_leading = as.numeric(ADHD_pre_Att_leading),
        ADHD_post_Hyp_leading = as.numeric(ADHD_post_Hyp_leading),
        ADHD_post_Att_leading = as.numeric(ADHD_post_Att_leading),
        
        # Cognitive performance variables
        avg_rt_oddb_CP = as.numeric(avg_rt_oddb_CP),
        avg_rt_oddb_FP = as.numeric(avg_rt_oddb_FP),
        n_oddb_CP = as.numeric(n_oddb_CP),
        n_oddb_FP = as.numeric(n_oddb_FP),
        n_oddb_CN = as.numeric(n_oddb_CN),
        n_oddb_FN = as.numeric(n_oddb_FN),
        avg_rt_wm_CP = as.numeric(avg_rt_wm_CP),
        avg_rt_wm_FP = as.numeric(avg_rt_wm_FP),
        n_wm_CP = as.numeric(n_wm_CP),
        n_wm_FP = as.numeric(n_wm_FP),
        n_wm_CN = as.numeric(n_wm_CN),
        n_wm_FN = as.numeric(n_wm_FN),
        
        # Create derived variables
        BMI = `Weight (kg)` / ((`Height (cm)` / 100)^2),
        BDI_change = BDI_post - BDI_pre,
        ADHD_Hyp_change = ADHD_post_Hyp_leading - ADHD_pre_Hyp_leading,
        ADHD_Att_change = ADHD_post_Att_leading - ADHD_pre_Att_leading,
        
        # Calculate percentage improvement for ADHD symptoms
        ADHD_Hyp_pct_improve = ifelse(ADHD_pre_Hyp_leading > 0, 
                                     ((ADHD_pre_Hyp_leading - ADHD_post_Hyp_leading) / ADHD_pre_Hyp_leading) * 100, 
                                     NA),
        ADHD_Att_pct_improve = ifelse(ADHD_pre_Att_leading > 0,
                                     ((ADHD_pre_Att_leading - ADHD_post_Att_leading) / ADHD_pre_Att_leading) * 100,
                                     NA),
        
        # Create clean responder definition (25% improvement in either domain)
        Responder_clean = case_when(
            # is.na(ADHD_pre_Hyp_leading) | is.na(ADHD_pre_Att_leading) ~ NA_character_,
            # is.na(ADHD_post_Hyp_leading) | is.na(ADHD_post_Att_leading) ~ NA_character_,
            Responder == "1" ~ "Responder",
            Responder == "0" ~ "Non-Responder",
            # Remitter == "1" ~ "Remitter",
            ADHD_Hyp_pct_improve >= 25 | ADHD_Att_pct_improve >= 25  ~ "Responder",
            ADHD_Hyp_pct_improve < 25 & ADHD_Att_pct_improve < 25 ~ "Non-Responder",
            TRUE ~ NA_character_
        ),
        
        # Create accuracy measures for cognitive tasks
        oddball_accuracy = n_oddb_CP / (n_oddb_CP + n_oddb_FP + n_oddb_CN + n_oddb_FN),
        wm_accuracy = n_wm_CP / (n_wm_CP + n_wm_FP + n_wm_CN + n_wm_FN),
        
        # Categorical variables
        well = factor(
            case_when(well == "-2" ~ "horrible",
                     well == "-1" ~ "less than normal", 
                     well == "1" ~ "normal",
                     well == "2" ~ "better than normal",
                     well == "3" ~ "awesome",
                     TRUE ~ as.character(well)),
            levels = c("horrible", "less than normal", "normal", "better than normal", "awesome"),
            ordered = TRUE
        ),
        vision = factor(vision),
        hearing = factor(hearing),
        
        # Create group variables
        treatment = ifelse(is.na(ADHD_pre_Hyp_leading), "No", "Yes"),
        group = case_when(
            indication == "HEALTHY" ~ "Healthy Control",
            indication == "ADHD" & is.na(ADHD_pre_Hyp_leading) ~ "ADHD (No Treatment)",
            indication == "ADHD" & !is.na(ADHD_pre_Hyp_leading) ~ "ADHD + Neurofeedback",
            TRUE ~ "Other"
        ),
        
        # Age groups
        age_group = case_when(
            age < 18 ~ "Children & Adolescents (<18 years)",
            age >= 18 & age < 60 ~ "Adults (18-59 years)",
            age >= 60 ~ "Older Adults (60+ years)",
            is.na(age) ~ NA_character_,
            TRUE ~ "Check age value"
        ),
        
        # Treatment response (clean definition based on 25% improvement)
        treatment_response = case_when(
            Responder_clean == "Responder" ~ "Responder",
            Responder_clean == "Non-Responder" ~ "Non-Responder", 
            TRUE ~ NA_character_
        )
    )

[1m[22m[36mℹ[39m In argument: `Weight (kg) = as.numeric(`Weight (kg)`)`.
[33m![39m NAs introduced by coercion


In [14]:
colnames(demographics)

In [15]:
fwrite(demographics, "TDBRAIN_participants_demographics_all.csv")

In [16]:
demographics %>%
    select(-c(cigarette_raw, coffee_raw, beer_raw, drugs_raw, BMI, Responder_clean)) %>%
    fwrite("TDBRAIN_participants_demographics_cleaned.csv")

## Demographics by Group

In [17]:
table1_vars <- c("age", "gender", "age_group", "BMI", "education", "sleep", 
                "well", "vision", "hearing", 
                "cigarette", "coffee", "beer", "drugs",  # numeric versions
                "cigarette_cat", "coffee_cat", "beer_cat", "drugs_cat",  # categorical versions
                "BDI_pre", "ADHD_pre_Hyp_leading", "ADHD_pre_Att_leading",
                "nrSessions", "NF Protocol")

table1_categorical <- c("gender", "age_group", "well", "vision", "hearing", "NF Protocol", 
                      "cigarette_cat", "coffee_cat", "beer_cat", "drugs_cat")

table1 <- CreateTableOne(
    vars = table1_vars,
    strata = "group",
    data = demographics,
    factorVars = table1_categorical,
    test = TRUE
)

“no non-missing arguments to min; returning Inf”
“no non-missing arguments to min; returning Inf”
“no non-missing arguments to min; returning Inf”
“no non-missing arguments to max; returning -Inf”
“no non-missing arguments to max; returning -Inf”
“no non-missing arguments to max; returning -Inf”
“no non-missing arguments to min; returning Inf”
“no non-missing arguments to max; returning -Inf”
“no non-missing arguments to min; returning Inf”
“no non-missing arguments to min; returning Inf”
“no non-missing arguments to min; returning Inf”
“no non-missing arguments to max; returning -Inf”
“no non-missing arguments to max; returning -Inf”
“no non-missing arguments to max; returning -Inf”
“no non-missing arguments to min; returning Inf”
“no non-missing arguments to min; returning Inf”
“no non-missing arguments to min; returning Inf”
“no non-missing arguments to max; returning -Inf”
“no non-missing arguments to max; returning -Inf”
“no non-missing arguments to max; returning -Inf”
“Variable 

In [18]:
table1

                                       Stratified by group
                                        ADHD (No Treatment)
  n                                       133              
  age (mean (SD))                       21.60 (14.47)      
  gender = 1 (%)                           93 ( 69.9)      
  age_group (%)                                            
     Adults (18-59 years)                  58 ( 43.9)      
     Children & Adolescents (<18 years)    74 ( 56.1)      
     Older Adults (60+ years)               0 (  0.0)      
  BMI (mean (SD))                       20.05 (4.77)       
  education (mean (SD))                  8.87 (6.02)       
  sleep (mean (SD))                      8.01 (2.26)       
  well (%)                                                 
     horrible                               1 (  0.8)      
     less than normal                       9 (  6.8)      
     normal                               109 ( 82.0)      
     better than normal                  

## Cognitive Performance + Treatment Response

In [19]:
cognitive_data <- demographics %>%
    filter(indication == "ADHD") %>%
    select(group, treatment_response, 
           # Oddball task
           avg_rt_oddb_CP, avg_rt_oddb_FP, oddball_accuracy,
           # Working memory task
           avg_rt_wm_CP, avg_rt_wm_FP, wm_accuracy,
           # Treatment outcomes
           ADHD_pre_Hyp_leading, ADHD_post_Hyp_leading, ADHD_Hyp_change,
           ADHD_pre_Att_leading, ADHD_post_Att_leading, ADHD_Att_change,
           BDI_pre, BDI_post, BDI_change,
           nrSessions, Responder, Remitter)

table3_vars <- c("avg_rt_oddb_CP", "avg_rt_oddb_FP", "oddball_accuracy",
                "avg_rt_wm_CP", "avg_rt_wm_FP", "wm_accuracy",
                "ADHD_pre_Hyp_leading", "ADHD_post_Hyp_leading", "ADHD_Hyp_change", "ADHD_Hyp_pct_improve",
                "ADHD_pre_Att_leading", "ADHD_post_Att_leading", "ADHD_Att_change", "ADHD_Att_pct_improve",
                "BDI_pre", "BDI_post", "BDI_change", "nrSessions")

table3 <- CreateTableOne(
    vars = table3_vars,
    data = cognitive_data,
    test = TRUE
)

“The data frame does not have: ADHD_Hyp_pct_improve ADHD_Att_pct_improve  Dropped”
“These variables only have NA/NaN: avg_rt_oddb_CP avg_rt_oddb_FP avg_rt_wm_CP avg_rt_wm_FP wm_accuracy  Dropped”


In [20]:
table3

                                   
                                    Overall       
  n                                    204        
  oddball_accuracy (mean (SD))        0.16 (0.04) 
  ADHD_pre_Hyp_leading (mean (SD))    5.56 (2.21) 
  ADHD_post_Hyp_leading (mean (SD))   1.76 (2.19) 
  ADHD_Hyp_change (mean (SD))        -3.80 (2.57) 
  ADHD_pre_Att_leading (mean (SD))    7.06 (1.61) 
  ADHD_post_Att_leading (mean (SD))   2.55 (2.67) 
  ADHD_Att_change (mean (SD))        -4.54 (2.78) 
  BDI_pre (mean (SD))                21.82 (10.86)
  BDI_post (mean (SD))                8.00 (7.74) 
  BDI_change (mean (SD))            -14.33 (7.72) 
  nrSessions (mean (SD))              1.08 (0.28) 

## Baseline Predictors of Treatment Success (Neurofeedback subset only)

In [21]:
treatment_data <- demographics %>%
    filter(group == "ADHD + Neurofeedback") %>%
    select(treatment_response, age, gender, BMI, education,
           # Baseline clinical measures
           ADHD_pre_Hyp_leading, ADHD_pre_Att_leading, BDI_pre,
           # Baseline cognitive performance
           oddball_accuracy,
           # Lifestyle factors
           sleep, well, cigarette, coffee, beer, drugs,
            cigarette_cat, coffee_cat, beer_cat, drugs_cat,
           # Treatment details
           nrSessions, `NF Protocol`)

table4_vars <- c("age", "gender", "BMI", "education",
                "ADHD_pre_Hyp_leading", "ADHD_pre_Att_leading", "BDI_pre",
                "oddball_accuracy",
                "sleep", "well", 
                # "cigarette", "coffee", "beer", "drugs",  # numeric versions
                "cigarette_cat", "coffee_cat", "beer_cat", "drugs_cat",  # categorical versions
                "nrSessions", "NF Protocol")

table4_categorical <- c("gender", "well", "NF Protocol", 
                        "cigarette_cat", "coffee_cat", "beer_cat", "drugs_cat")

table4 <- CreateTableOne(
    vars = table4_vars,
    strata = "treatment_response",
    data = treatment_data,
    factorVars = table4_categorical,
    test = TRUE
)

“These variables only have NA/NaN: BMI  Dropped”


In [22]:
table4

                                     Stratified by treatment_response
                                      Non-Responder Responder     p      test
  n                                      10            61                    
  age (mean (SD))                     14.74 (9.32)  27.90 (14.38)  0.007     
  gender = 1 (%)                          5 (50.0)     38 (62.3)   0.698     
  education (mean (SD))                6.30 (5.06)  12.23 (5.37)   0.002     
  ADHD_pre_Hyp_leading (mean (SD))     5.40 (2.46)   5.59 (2.19)   0.803     
  ADHD_pre_Att_leading (mean (SD))     6.80 (2.25)   7.10 (1.49)   0.588     
  BDI_pre (mean (SD))                 29.00 (10.00) 21.20 (10.84)  0.238     
  oddball_accuracy (mean (SD))         0.17 (0.01)   0.17 (0.02)   0.857     
  sleep (mean (SD))                    7.70 (3.27)   7.39 (1.89)   0.673     
  well (%)                                                        <0.001     
     less than normal                     1 (11.1)     11 (18.0)        

## Delete non-ADHD or healthy sample

In [23]:
keep_id <- participant %>% filter(indication == "ADHD" | formal_status == "ADHD" | indication == "HEALTHY") %>% pull(participants_ID)
length(keep_id)

In [24]:
all_id <- participant %>% pull(participants_ID)
delete_id <- setdiff(all_id, keep_id)

In [29]:
base_directory <- "/Users/tianyi/Library/Mobile Documents/com~apple~CloudDocs/TD_Brain_data/preprocessed"

for(id in keep_id) {
  dir_path <- file.path(base_directory, id)
  
  if(dir.exists(dir_path)) {
    flist = list.files(dir_path, pattern = "cleaned")
    # unlink(dir_path, recursive = TRUE)
    cat("✓ Deleted:", id, "\n")
  } else {
    cat("✗ Not found:", id, "\n")
  }
}

✗ Not found: sub-87974617 
✗ Not found: sub-87974621 
✗ Not found: sub-87974665 
✗ Not found: sub-87974709 
✗ Not found: sub-87974841 
✗ Not found: sub-87974973 
✗ Not found: sub-87976193 
✗ Not found: sub-87976369 
✗ Not found: sub-87976413 
✗ Not found: sub-87976457 
✗ Not found: sub-87976461 
✗ Not found: sub-87976505 
✗ Not found: sub-87976641 
✗ Not found: sub-87976773 
✗ Not found: sub-87976817 
✗ Not found: sub-87976953 
✗ Not found: sub-87977045 
✗ Not found: sub-87980197 
✗ Not found: sub-87980241 
✗ Not found: sub-87980329 
✗ Not found: sub-87980373 
✗ Not found: sub-87980417 
✗ Not found: sub-87980689 
✗ Not found: sub-87980869 
✗ Not found: sub-87980913 
✗ Not found: sub-87982225 
✗ Not found: sub-87982849 
✗ Not found: sub-88005441 
✗ Not found: sub-88005485 
✗ Not found: sub-88006297 
✗ Not found: sub-88007109 
✗ Not found: sub-88007241 
✗ Not found: sub-88008997 
✗ Not found: sub-88009309 
✗ Not found: sub-88013089 
✗ Not found: sub-88015117 
✗ Not found: sub-88015117 
✗

In [26]:
have_id <- list.files("/Users/tianyi/Library/Mobile Documents/com~apple~CloudDocs/TD_Brain_data/preprocessed/")

In [27]:
length(have_id)