In [1]:
library(tidyverse)
library(mediation)
library(brms)
library(dplyr)
library(lme4)
library(ggplot2)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   4.0.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.1.0     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors


1: package ‘tidyverse’ was built under R version 4.3.3 
2: package ‘lubridate’ was built under R version 4.3.2 


Loading required package: MASS

Attaching package: ‘MASS’

The following object is masked from ‘package:dplyr’:

    select

Loading required package: Matrix

Attaching package: ‘Matrix’

The following objects are masked from ‘package:tidyr’:

    expand, pack, unpack

Loading required package: mvtnorm
Loading required package: sandwich
mediation: Causal Mediation Analysis
Version: 4.5.1



1: package ‘mvtnorm’ was built under R version 4.3.3 
2: package ‘sandwich’ was built under R version 4.3.3 


Loading required package: Rcpp
Loading 'brms' package (version 2.22.0). Useful instructions
can be found by typing help('brms'). A more detailed introduction
to the package is available through vignette('brms_overview').

Attaching package: ‘brms’

The following object is masked from ‘package:stats’:

    ar



package ‘brms’ was built under R version 4.3.3 



Attaching package: ‘lme4’

The following object is masked from ‘package:brms’:

    ngrps



# Import

In [2]:
df_long <- read.csv("df_RCT.csv")

# Sleep

In [4]:

mediators <- c("smip_pre_max", "smip_post_max", "pif_pre_max", "pif_post_max", "sindex_pre_max", "sindex_post_max", "anxiety_sum", "depression_sum", "sfpa_sum")
outcomes  <- c("psqi_sum", "psqi_disturbances")

# ---- storage for CSV ----
results_list <- list()

# ---- PDF diagnostics ----
pdf("mediation_diagnostics.pdf", width = 8, height = 6)

row_index <- 1

for (med in mediators) {
  for (out in outcomes) {
    
    message("Running: ", med, " -> ", out)

    # Subset complete data
    vars_needed <- c(med, out, "time_point", "record_id")
    temp_data <- df_long[complete.cases(df_long[vars_needed]), ]
    
    if (nrow(temp_data) < 10) {
      message("Skipping (too few rows).")
      next
    }
    
    # Build formulas
    med_formula <- as.formula(paste0(med, " ~ time_point + (1 | record_id)"))
    out_formula <- as.formula(paste0(out, " ~ time_point + ", med, " + (1 | record_id)"))
    
    # Fit models with error handling
    med.fit <- try(lmer(med_formula, data = temp_data, REML = FALSE), silent = TRUE)
    if (inherits(med.fit, "try-error")) next
    
    out.fit <- try(lmer(out_formula, data = temp_data, REML = FALSE), silent = TRUE)
    if (inherits(out.fit, "try-error")) next
    
    # Mediation analysis
    med.out <- try(
      mediate(
        model.m = med.fit,
        model.y = out.fit,
        treat = "time_point",
        mediator = med,
        sims = 2000
      ), 
      silent = TRUE
    )
    
    if (inherits(med.out, "try-error")) next
    
    #### ---- Save results for CSV ---- ####
    sum_out <- summary(med.out)
    
    results_list[[row_index]] <- data.frame(
      mediator = med,
      outcome  = out,
      ACME_est = sum_out$d0,
      ACME_p   = sum_out$d0.p,
      ADE_est  = sum_out$z0,
      ADE_p    = sum_out$z0.p,
      Total_est = sum_out$tau.coef,
      Total_p   = sum_out$tau.p,
      PropMed  = sum_out$n0,
      stringsAsFactors = FALSE
    )
    row_index <- row_index + 1
    
    #### ---- Diagnostics: Density plot of simulation draws ---- ####
    plot_df <- data.frame(
      ACME = med.out$d0.sims,
      ADE  = med.out$z0.sims
    )
    
    print(
      ggplot(plot_df, aes(x = ACME)) +
        geom_density() +
        ggtitle(paste("ACME Simulation Distribution:", med, "→", out))
    )
    
    print(
      ggplot(plot_df, aes(x = ADE)) +
        geom_density() +
        ggtitle(paste("ADE Simulation Distribution:", med, "→", out))
    )
    
  }
}

dev.off()

# ---- Write CSV ----
results_df <- bind_rows(results_list)
write.csv(results_df, "mediation_results.csv", row.names = FALSE)

message("Done. CSV and PDF created.")


Running: smip_pre_max -> psqi_sum


: Error in `geom_density()`:
! Problem while computing aesthetics.
ℹ Error occurred in the 1st layer.
Caused by error:
! object 'ACME' not found

# Respiratory Indices

## 4 Models Seperate

### Correlations Seperated by Time

In [41]:
library(dplyr)
library(tidyr)

# Define your variables
outcomes <- c("fss_sum", "woods_sum", "dsq_sum", "vas_fatiguecurrent")
resp_vars <- c("mip_pre_max", "mip_pre_max_percentpredict_1", "mip_pre_max_percentpredict_2",
               "smip_pre_max", "fit_pre_max", "pif_pre_max", "sindex_pre_max", "volume_pre_max",
               "id_pre_max", "slopesmip_pre_max",
               "mip_post_max", "mip_post_max_percentpredict_1", "mip_post_max_percentpredict_2",
               "smip_post_max","fit_post_max", "pif_post_max", "sindex_post_max",
               "volume_post_max", "id_post_max", "slopesmip_post_max")

# Function to calculate both Pearson and Spearman correlations
calculate_correlations <- function(data, outcome, respiratory, data_source) {
  # Remove rows with missing values for these two variables
  clean_data <- data[complete.cases(data[c(outcome, respiratory)]), ]
  
  # Calculate Pearson correlation
  pearson_result <- cor.test(clean_data[[outcome]], clean_data[[respiratory]], 
                             method = "pearson")
  
  # Calculate Spearman correlation
  spearman_result <- cor.test(clean_data[[outcome]], clean_data[[respiratory]], 
                              method = "spearman")
  
  # Return as a data frame
  tibble(
    data_source = data_source,
    outcome = outcome,
    respiratory = respiratory,
    pearson_r = pearson_result$estimate,
    pearson_p = pearson_result$p.value,
    spearman_rho = spearman_result$estimate,
    spearman_p = spearman_result$p.value,
    n = nrow(clean_data)
  )
}

# Initialize results list
correlation_results <- list()

# Calculate correlations for all combinations for BOTH datasets
i <- 1
for (y in outcomes) {
  for (x in resp_vars) {
    # Combined Visit 1 + Visit 2 data
    correlation_results[[i]] <- calculate_correlations(df_long, y, x, "combined_visits")
    i <- i + 1
    
    # Visit 1 only data
    visit1_data <- df_long %>% filter(time_point == 1)
    correlation_results[[i]] <- calculate_correlations(visit1_data, y, x, "visit1_only")
    i <- i + 1
  }
}

# Combine all results
final_correlations <- bind_rows(correlation_results)

# Reorder columns to match your requested format
final_correlations <- final_correlations %>%
  dplyr::select(data_source, outcome, respiratory, pearson_r, pearson_p, spearman_rho, spearman_p, n)

# Print a sample to check
head(final_correlations)

# Save to CSV
write.csv(final_correlations, "bivariate_correlations_all_data.csv", row.names = FALSE)

# Create formatted summary tables sorted by strongest Spearman correlation
summary_correlations <- final_correlations %>%
  group_by(data_source, outcome) %>%
  arrange(data_source, outcome, desc(abs(spearman_rho))) %>%
  mutate(
    pearson_sig = case_when(
      pearson_p < 0.001 ~ "***",
      pearson_p < 0.01 ~ "**",
      pearson_p < 0.05 ~ "*",
      TRUE ~ ""
    ),
    spearman_sig = case_when(
      spearman_p < 0.001 ~ "***",
      spearman_p < 0.01 ~ "**",
      spearman_p < 0.05 ~ "*",
      TRUE ~ ""
    )
  )

# Print summary tables for easy viewing
cat("COMBINED VISITS (1+2) - Correlations with FSS (Fatigue):\n")
summary_correlations %>% 
  filter(data_source == "combined_visits" & outcome == "fss_sum") %>% 
  print(n = 20)

cat("\nCOMBINED VISITS (1+2) - Correlations with Woods MFI (Brain Fog):\n")
summary_correlations %>% 
  filter(data_source == "combined_visits" & outcome == "woods_sum") %>% 
  print(n = 20)

cat("\nVISIT 1 ONLY - Correlations with FSS (Fatigue):\n")
summary_correlations %>% 
  filter(data_source == "visit1_only" & outcome == "fss_sum") %>% 
  print(n = 20)

cat("\nVISIT 1 ONLY - Correlations with Woods MFI (Brain Fog):\n")
summary_correlations %>% 
  filter(data_source == "visit1_only" & outcome == "woods_sum") %>% 
  print(n = 20)

# Save the sorted summary
write.csv(summary_correlations, "sorted_correlations_summary_all_data.csv", row.names = FALSE)

# Optional: Create a wide format for easy comparison between datasets
wide_format <- final_correlations %>%
  dplyr::select(-pearson_p, -spearman_p) %>%
  pivot_wider(
    names_from = data_source,
    values_from = c(pearson_r, spearman_rho, n),
    names_glue = "{data_source}_{.value}"
  ) %>%
  dplyr::select(outcome, respiratory, 
         visit1_only_pearson_r, combined_visits_pearson_r,
         visit1_only_spearman_rho, combined_visits_spearman_rho,
         visit1_only_n, combined_visits_n)

# Save wide format for easy comparison
write.csv(wide_format, "correlations_wide_format_comparison.csv", row.names = FALSE)

cat("\nWide Format Comparison (first 10 rows):\n")
print(head(wide_format, 10))



COMBINED VISITS (1+2) - Correlations with FSS (Fatigue):
# A tibble: 20 × 10
# Groups:   data_source, outcome [1]
   data_source  outcome respiratory pearson_r pearson_p spearman_rho spearman_p
   <chr>        <chr>   <chr>           <dbl>     <dbl>        <dbl>      <dbl>
 1 combined_vi… fss_sum mip_post_m…   -0.615  0.0000237      -0.605   0.0000352
 2 combined_vi… fss_sum mip_post_m…   -0.493  0.00122        -0.582   0.0000806
 3 combined_vi… fss_sum mip_post_m…   -0.600  0.0000424      -0.573   0.000110 
 4 combined_vi… fss_sum mip_pre_ma…   -0.582  0.0000811      -0.543   0.000294 
 5 combined_vi… fss_sum mip_pre_ma…   -0.596  0.0000493      -0.531   0.000424 
 6 combined_vi… fss_sum mip_pre_max   -0.474  0.00202        -0.484   0.00154  
 7 combined_vi… fss_sum sindex_pos…   -0.426  0.00686        -0.479   0.00202  
 8 combined_vi… fss_sum pif_post_m…   -0.421  0.00759        -0.459   0.00328  
 9 combined_vi… fss_sum smip_post_…   -0.348  0.0277         -0.388   0.0133   
10 com

### Correlation Matrix

In [37]:
library(dplyr)
library(tidyr)

# Define your variables
outcomes <- c("fss_sum", "woods_sum")
resp_vars <- c("mip_pre_max", "mip_pre_max_percentpredict_1", "mip_pre_max_percentpredict_2",
               "smip_pre_max", "fit_pre_max", "pif_pre_max", "sindex_pre_max", "volume_pre_max",
               "id_pre_max", "slopesmip_pre_max",
               "mip_post_max", "mip_post_max_percentpredict_1", "mip_post_max_percentpredict_2",
               "smip_post_max","fit_post_max", "pif_post_max", "sindex_post_max",
               "volume_post_max", "id_post_max", "slopesmip_post_max")

# Function to calculate both Pearson and Spearman correlations
calculate_correlations <- function(data, outcome, respiratory) {
  # Remove rows with missing values for these two variables
  clean_data <- data[complete.cases(data[c(outcome, respiratory)]), ]
  
  # Calculate Pearson correlation
  pearson_result <- cor.test(clean_data[[outcome]], clean_data[[respiratory]], 
                             method = "pearson")
  
  # Calculate Spearman correlation
  spearman_result <- cor.test(clean_data[[outcome]], clean_data[[respiratory]], 
                              method = "spearman")
  
  # Return as a data frame
  tibble(
    outcome = outcome,
    respiratory = respiratory,
    pearson_r = pearson_result$estimate,
    pearson_p = pearson_result$p.value,
    spearman_rho = spearman_result$estimate,
    spearman_p = spearman_result$p.value,
    n = nrow(clean_data)
  )
}

# Initialize results list
correlation_results <- list()

# Calculate correlations for all combinations
i <- 1
for (y in outcomes) {
  for (x in resp_vars) {
    correlation_results[[i]] <- calculate_correlations(df_long, y, x)
    i <- i + 1
  }
}

# Combine all results
final_correlations <- bind_rows(correlation_results)

# Reorder columns to match your requested format
final_correlations <- final_correlations %>%
  dplyr::select(outcome, respiratory, pearson_r, pearson_p, spearman_rho, spearman_p, n)

# Print a sample to check
head(final_correlations)

# Save to CSV
write.csv(final_correlations, "bivariate_correlations_results.csv", row.names = FALSE)

# Optional: Create a formatted summary table sorted by strongest Spearman correlation
summary_correlations <- final_correlations %>%
  group_by(outcome) %>%
  arrange(desc(abs(spearman_rho))) %>%
  mutate(
    pearson_sig = ifelse(pearson_p < 0.05, "*", ""),
    spearman_sig = ifelse(spearman_p < 0.05, "*", "")
  )

# Print summary tables
cat("Correlations with FSS (Fatigue):\n")
summary_correlations %>% 
  filter(outcome == "fss_sum") %>% 
  print(n = 20)

cat("\nCorrelations with Woods MFI (Brain Fog):\n")
summary_correlations %>% 
  filter(outcome == "woods_sum") %>% 
  print(n = 20)

# Save the sorted summary
write.csv(summary_correlations, "sorted_correlations_summary.csv", row.names = FALSE)



Correlations with FSS (Fatigue):
# A tibble: 20 × 9
# Groups:   outcome [1]
   outcome respiratory        pearson_r pearson_p spearman_rho spearman_p     n
   <chr>   <chr>                  <dbl>     <dbl>        <dbl>      <dbl> <int>
 1 fss_sum mip_post_max_perc…   -0.615  0.0000237      -0.605   0.0000352    40
 2 fss_sum mip_post_max         -0.493  0.00122        -0.582   0.0000806    40
 3 fss_sum mip_post_max_perc…   -0.600  0.0000424      -0.573   0.000110     40
 4 fss_sum mip_pre_max_perce…   -0.582  0.0000811      -0.543   0.000294     40
 5 fss_sum mip_pre_max_perce…   -0.596  0.0000493      -0.531   0.000424     40
 6 fss_sum mip_pre_max          -0.474  0.00202        -0.484   0.00154      40
 7 fss_sum sindex_post_max      -0.426  0.00686        -0.479   0.00202      39
 8 fss_sum pif_post_max         -0.421  0.00759        -0.459   0.00328      39
 9 fss_sum smip_post_max        -0.348  0.0277         -0.388   0.0133       40
10 fss_sum pif_pre_max          -0.389  0.01

### 11/25 Standardized Betas

In [34]:
library(lme4)
library(broom.mixed)
library(dplyr)
library(tidyr)
library(performance)  # for r2()

outcomes <- c("fss_sum","woods_sum", "dsq_sum", "vas_fatiguecurrent")
resp_vars <- c("mip_pre_max", "mip_pre_max_percentpredict_1", "mip_pre_max_percentpredict_2",
               "smip_pre_max", "fit_pre_max", "pif_pre_max", "sindex_pre_max", "volume_pre_max",
               "id_pre_max", "slopesmip_pre_max",
               "mip_post_max", "mip_post_max_percentpredict_1", "mip_post_max_percentpredict_2",
               "smip_post_max","fit_post_max", "pif_post_max", "sindex_post_max",
               "volume_post_max", "id_post_max", "slopesmip_post_max")

results <- list()

add_result <- function(model_type, outcome, respiratory, fit, varname, is_lmer=TRUE) {
  coef_row <- tidy(fit) %>% filter(term == varname)
  
  # Calculate R²
  if(is_lmer){
    r2_vals <- performance::r2(fit)
    adj_r2 <- r2_vals$R2_marginal  # fixed effects only
    conditional_r2 <- r2_vals$R2_conditional  # fixed + random
  } else {
    adj_r2 <- summary(fit)$adj.r.squared
    conditional_r2 <- NA
  }
  
  # Calculate standardized beta
  if(is_lmer){
    # For mixed models, standardize using model frame
    data_used <- model.frame(fit)
    x_var <- data_used[[varname]]
    y_var <- data_used[[1]]  # first column is response
    std_beta <- coef_row$estimate * (sd(x_var, na.rm = TRUE) / sd(y_var, na.rm = TRUE))
  } else {
    # For lm models
    data_used <- model.frame(fit)
    x_var <- data_used[[varname]]
    y_var <- data_used[[1]]
    std_beta <- coef_row$estimate * (sd(x_var, na.rm = TRUE) / sd(y_var, na.rm = TRUE))
  }
  
  tibble(
    model_type = model_type,
    outcome = outcome,
    respiratory = respiratory,
    beta = std_beta,  # NOW REPORTING STANDARDIZED BETA
    std_error = coef_row$std.error,
    statistic = coef_row$statistic,
    p_value = coef_row$p.value,
    adj_r2 = adj_r2,
    conditional_r2 = conditional_r2
  )
}

i <- 1

for (y in outcomes) {
  for (x in resp_vars) {
    
    # MODEL 1: two time points, respiratory only
    data12 <- df_long %>% dplyr::select(record_id, !!sym(y), !!sym(x), time_point)
    fit1 <- lmer(as.formula(paste(y, "~", x, "+ (1|record_id)")), data = data12)
    results[[i]] <- add_result("Model1_two_timepoints_resp_only", y, x, fit1, x)
    i <- i + 1
    
    # MODEL 2: two time points, respiratory + covariates
    data12c <- df_long %>% dplyr::select(record_id, !!sym(y), !!sym(x), time_point, data_age, subject_female)
    fit2 <- lmer(as.formula(paste(y, "~", x, "+ data_age + subject_female + (1|record_id)")), data = data12c)
    results[[i]] <- add_result("Model2_two_timepoints_with_covariates", y, x, fit2, x)
    i <- i + 1
    
    # MODEL 3: visit 1 only, respiratory only
    v1 <- df_long %>% filter(time_point == 1) %>% dplyr::select(record_id, !!sym(y), !!sym(x))
    fit3 <- lm(as.formula(paste(y, "~", x)), data = v1)
    results[[i]] <- add_result("Model3_visit1_resp_only", y, x, fit3, x, is_lmer=FALSE)
    i <- i + 1
    
    # MODEL 4: visit 1 only, respiratory + covariates
    v1c <- df_long %>% filter(time_point == 1) %>% dplyr::select(record_id, !!sym(y), !!sym(x), data_age, subject_female)
    fit4 <- lm(as.formula(paste(y, "~", x, "+ data_age + subject_female")), data = v1c)
    results[[i]] <- add_result("Model4_visit1_with_covariates", y, x, fit4, x, is_lmer=FALSE)
    i <- i + 1
  }
}

# Combine results
final_results <- bind_rows(results)

# Create a summary per respiratory variable and outcome
summary_results <- final_results %>%
  group_by(outcome, respiratory) %>%
  summarise(
    mean_beta = mean(beta),  # Now this is mean standardized beta
    mean_adj_r2 = mean(adj_r2, na.rm = TRUE),
    mean_conditional_r2 = mean(conditional_r2, na.rm = TRUE),
    significant_models = sum(p_value < 0.05),
    model1_beta = beta[model_type=="Model1_two_timepoints_resp_only"],
    model1_p = p_value[model_type=="Model1_two_timepoints_resp_only"],
    model1_adjR2 = adj_r2[model_type=="Model1_two_timepoints_resp_only"],
    model1_condR2 = conditional_r2[model_type=="Model1_two_timepoints_resp_only"],
    model2_beta = beta[model_type=="Model2_two_timepoints_with_covariates"],
    model2_p = p_value[model_type=="Model2_two_timepoints_with_covariates"],
    model2_adjR2 = adj_r2[model_type=="Model2_two_timepoints_with_covariates"],
    model2_condR2 = conditional_r2[model_type=="Model2_two_timepoints_with_covariates"],
    model3_beta = beta[model_type=="Model3_visit1_resp_only"],
    model3_p = p_value[model_type=="Model3_visit1_resp_only"],
    model3_adjR2 = adj_r2[model_type=="Model3_visit1_resp_only"],
    model3_condR2 = conditional_r2[model_type=="Model3_visit1_resp_only"],
    model4_beta = beta[model_type=="Model4_visit1_with_covariates"],
    model4_p = p_value[model_type=="Model4_visit1_with_covariates"],
    model4_adjR2 = adj_r2[model_type=="Model4_visit1_with_covariates"],
    model4_condR2 = conditional_r2[model_type=="Model4_visit1_with_covariates"]
  )

# Save to CSV
write.csv(final_results, "respiratory_model_results_all_models.csv", row.names = FALSE)
write.csv(summary_results, "respiratory_model_results_summary.csv", row.names = FALSE)

`summarise()` has grouped output by 'outcome'. You can override using the
`.groups` argument.


### No MUMIN

In [33]:
library(lme4)
library(broom.mixed)
library(dplyr)
library(tidyr)
library(performance)  # for r2()

outcomes <- c("fss_sum","woods_sum")
resp_vars <- c("mip_pre_max", "mip_pre_max_percentpredict_1", "mip_pre_max_percentpredict_2",
               "smip_pre_max", "fit_pre_max", "pif_pre_max", "sindex_pre_max", "volume_pre_max",
               "id_pre_max", "slopesmip_pre_max",
               "mip_post_max", "mip_post_max_percentpredict_1", "mip_post_max_percentpredict_2",
               "smip_post_max","fit_post_max", "pif_post_max", "sindex_post_max",
               "volume_post_max", "id_post_max", "slopesmip_post_max")

results <- list()

add_result <- function(model_type, outcome, respiratory, fit, varname, is_lmer=TRUE) {
  coef_row <- tidy(fit) %>% filter(term == varname)
  
  # Calculate R²
  if(is_lmer){
    r2_vals <- performance::r2(fit)
    adj_r2 <- r2_vals$R2_marginal  # fixed effects only
    conditional_r2 <- r2_vals$R2_conditional  # fixed + random
  } else {
    adj_r2 <- summary(fit)$adj.r.squared
    conditional_r2 <- NA
  }
  
  tibble(
    model_type = model_type,
    outcome = outcome,
    respiratory = respiratory,
    beta = coef_row$estimate,
    std_error = coef_row$std.error,
    statistic = coef_row$statistic,
    p_value = coef_row$p.value,
    adj_r2 = adj_r2,
    conditional_r2 = conditional_r2
  )
}

i <- 1

for (y in outcomes) {
  for (x in resp_vars) {
    
    # MODEL 1: two time points, respiratory only
    data12 <- df_long %>% dplyr::select(record_id, !!sym(y), !!sym(x), time_point)
    fit1 <- lmer(as.formula(paste(y, "~", x, "+ (1|record_id)")), data = data12)
    results[[i]] <- add_result("Model1_two_timepoints_resp_only", y, x, fit1, x)
    i <- i + 1
    
    # MODEL 2: two time points, respiratory + covariates
    data12c <- df_long %>% dplyr::select(record_id, !!sym(y), !!sym(x), time_point, data_age, subject_female)
    fit2 <- lmer(as.formula(paste(y, "~", x, "+ data_age + subject_female + (1|record_id)")), data = data12c)
    results[[i]] <- add_result("Model2_two_timepoints_with_covariates", y, x, fit2, x)
    i <- i + 1
    
    # MODEL 3: visit 1 only, respiratory only
    v1 <- df_long %>% filter(time_point == 1) %>% dplyr::select(record_id, !!sym(y), !!sym(x))
    fit3 <- lm(as.formula(paste(y, "~", x)), data = v1)
    results[[i]] <- add_result("Model3_visit1_resp_only", y, x, fit3, x, is_lmer=FALSE)
    i <- i + 1
    
    # MODEL 4: visit 1 only, respiratory + covariates
    v1c <- df_long %>% filter(time_point == 1) %>% dplyr::select(record_id, !!sym(y), !!sym(x), data_age, subject_female)
    fit4 <- lm(as.formula(paste(y, "~", x, "+ data_age + subject_female")), data = v1c)
    results[[i]] <- add_result("Model4_visit1_with_covariates", y, x, fit4, x, is_lmer=FALSE)
    i <- i + 1
  }
}

# Combine results
final_results <- bind_rows(results)

# Create a summary per respiratory variable and outcome
summary_results <- final_results %>%
  group_by(outcome, respiratory) %>%
  summarise(
    mean_beta = mean(beta),
    mean_adj_r2 = mean(adj_r2, na.rm = TRUE),
    mean_conditional_r2 = mean(conditional_r2, na.rm = TRUE),
    significant_models = sum(p_value < 0.05),
    model1_beta = beta[model_type=="Model1_two_timepoints_resp_only"],
    model1_p = p_value[model_type=="Model1_two_timepoints_resp_only"],
    model1_adjR2 = adj_r2[model_type=="Model1_two_timepoints_resp_only"],
    model1_condR2 = conditional_r2[model_type=="Model1_two_timepoints_resp_only"],
    model2_beta = beta[model_type=="Model2_two_timepoints_with_covariates"],
    model2_p = p_value[model_type=="Model2_two_timepoints_with_covariates"],
    model2_adjR2 = adj_r2[model_type=="Model2_two_timepoints_with_covariates"],
    model2_condR2 = conditional_r2[model_type=="Model2_two_timepoints_with_covariates"],
    model3_beta = beta[model_type=="Model3_visit1_resp_only"],
    model3_p = p_value[model_type=="Model3_visit1_resp_only"],
    model3_adjR2 = adj_r2[model_type=="Model3_visit1_resp_only"],
    model3_condR2 = conditional_r2[model_type=="Model3_visit1_resp_only"],
    model4_beta = beta[model_type=="Model4_visit1_with_covariates"],
    model4_p = p_value[model_type=="Model4_visit1_with_covariates"],
    model4_adjR2 = adj_r2[model_type=="Model4_visit1_with_covariates"],
    model4_condR2 = conditional_r2[model_type=="Model4_visit1_with_covariates"]
  )

# Save to CSV
write.csv(final_results, "respiratory_model_results_all_models.csv", row.names = FALSE)
write.csv(summary_results, "respiratory_model_results_summary.csv", row.names = FALSE)


`summarise()` has grouped output by 'outcome'. You can override using the
`.groups` argument.


### MUMIN

In [31]:
library(lme4)
library(broom.mixed)
library(dplyr)
library(tidyr)
library(MuMIn)

outcomes <- c("fss_sum","woods_sum")
resp_vars <- c("mip_pre_max", "mip_pre_max_percentpredict_1", "mip_pre_max_percentpredict_2", "smip_pre_max", "fit_pre_max", "pif_pre_max", "sindex_pre_max", "volume_pre_max", "id_pre_max", "slopesmip_pre_max",
               "mip_post_max", "mip_post_max_percentpredict_1", "mip_post_max_percentpredict_2", "smip_post_max","fit_post_max", "pif_post_max", "sindex_post_max", "volume_post_max", "id_post_max", "slopesmip_post_max")


results <- list()

add_result <- function(model_type, outcome, respiratory, fit, varname, is_lmer=TRUE) {
  coef_row <- tidy(fit) %>% filter(term == varname)
  
  # Calculate adjusted R²
  if(is_lmer){
    r2 <- MuMIn::r.squaredGLMM(fit) # marginal and conditional
    adj_r2 <- r2[1] # marginal R² as approximation
  } else {
    adj_r2 <- summary(fit)$adj.r.squared
  }
  
  tibble(
    model_type = model_type,
    outcome = outcome,
    respiratory = respiratory,
    beta = coef_row$estimate,
    std_error = coef_row$std.error,
    statistic = coef_row$statistic,
    p_value = coef_row$p.value,
    adj_r2 = adj_r2
  )
}

i <- 1

for (y in outcomes) {
  for (x in resp_vars) {
    
    # MODEL 1: two time points, respiratory only
    data12 <- df_long %>% dplyr::select(record_id, !!sym(y), !!sym(x), time_point)
    fit1 <- lmer(as.formula(paste(y, "~", x, "+ (1|record_id)")), data = data12)
    results[[i]] <- add_result("Model1_two_timepoints_resp_only", y, x, fit1, x)
    i <- i + 1
    
    # MODEL 2: two time points, respiratory + covariates
    data12c <- df_long %>% dplyr::select(record_id, !!sym(y), !!sym(x), time_point, data_age, subject_female)
    fit2 <- lmer(as.formula(paste(y, "~", x, "+ data_age + subject_female + (1|record_id)")), data = data12c)
    results[[i]] <- add_result("Model2_two_timepoints_with_covariates", y, x, fit2, x)
    i <- i + 1
    
    # MODEL 3: visit 1 only, respiratory only
    v1 <- df_long %>% filter(time_point == 1) %>% dplyr::select(record_id, !!sym(y), !!sym(x))
    fit3 <- lm(as.formula(paste(y, "~", x)), data = v1)
    results[[i]] <- add_result("Model3_visit1_resp_only", y, x, fit3, x, is_lmer=FALSE)
    i <- i + 1
    
    # MODEL 4: visit 1 only, respiratory + covariates
    v1c <- df_long %>% filter(time_point == 1) %>% dplyr::select(record_id, !!sym(y), !!sym(x), data_age, subject_female)
    fit4 <- lm(as.formula(paste(y, "~", x, "+ data_age + subject_female")), data = v1c)
    results[[i]] <- add_result("Model4_visit1_with_covariates", y, x, fit4, x, is_lmer=FALSE)
    i <- i + 1
  }
}

# Combine results
final_results <- bind_rows(results)

# Optional: create a summary per respiratory variable and outcome
summary_results <- final_results %>%
  group_by(outcome, respiratory) %>%
  summarise(
    mean_beta = mean(beta),
    mean_adj_r2 = mean(adj_r2),
    significant_models = sum(p_value < 0.05),
    model1_beta = beta[model_type=="Model1_two_timepoints_resp_only"],
    model1_p = p_value[model_type=="Model1_two_timepoints_resp_only"],
    model1_adjR2 = adj_r2[model_type=="Model1_two_timepoints_resp_only"],
    model2_beta = beta[model_type=="Model2_two_timepoints_with_covariates"],
    model2_p = p_value[model_type=="Model2_two_timepoints_with_covariates"],
    model2_adjR2 = adj_r2[model_type=="Model2_two_timepoints_with_covariates"],
    model3_beta = beta[model_type=="Model3_visit1_resp_only"],
    model3_p = p_value[model_type=="Model3_visit1_resp_only"],
    model3_adjR2 = adj_r2[model_type=="Model3_visit1_resp_only"],
    model4_beta = beta[model_type=="Model4_visit1_with_covariates"],
    model4_p = p_value[model_type=="Model4_visit1_with_covariates"],
    model4_adjR2 = adj_r2[model_type=="Model4_visit1_with_covariates"]
  )

# Save to CSV
write.csv(final_results, "respiratory_model_results_all_models.csv", row.names = FALSE)
write.csv(summary_results, "respiratory_model_results_summary.csv", row.names = FALSE)


: Error in `library()`:
! there is no package called ‘MuMIn’

## Averaged 4 Models

In [None]:
library(lme4)
library(broom.mixed)
library(broom)
library(dplyr)
library(performance)
library(readr)

outcomes <- c("fss_sum","woods_sum")
resp_vars <- c("mip_pre_max", "mip_pre_max_percentpredict_1", "mip_pre_max_percentpredict_2", "smip_pre_max", "fit_pre_max", "pif_pre_max", "sindex_pre_max", "volume_pre_max", "id_pre_max", "slopesmip_pre_max",
               "mip_post_max", "mip_post_max_percentpredict_1", "mip_post_max_percentpredict_2", "smip_post_max","fit_post_max", "pif_post_max", "sindex_post_max", "volume_post_max", "id_post_max", "slopesmip_post_max")

results <- list()
i <- 1

standardize_df <- function(df, vars){
  df %>% mutate(across(all_of(vars), scale))
}

for (y in outcomes) {
  for (x in resp_vars) {

    ### ----------------- STANDARDIZED DATASETS -----------------
    data12 <- df_long %>% 
      dplyr::select(record_id, !!sym(y), !!sym(x), time_point) %>%
      standardize_df(c(y, x))

    data12c <- df_long %>% 
      dplyr::select(record_id, !!sym(y), !!sym(x), time_point, data_age, subject_female) %>%
      standardize_df(c(y, x, "data_age", "subject_female"))

    v1 <- df_long %>%
      filter(time_point == 1) %>%
      dplyr::select(record_id, !!sym(y), !!sym(x)) %>%
      standardize_df(c(y, x))

    v1c <- df_long %>%
      filter(time_point == 1) %>%
      dplyr::select(record_id, !!sym(y), !!sym(x), data_age, subject_female) %>%
      standardize_df(c(y, x, "data_age", "subject_female"))

    ### ----------------- MODEL 1 -----------------
    fit1 <- lmer(as.formula(paste(y, "~", x, "+ (1|record_id)")), data = data12)
    tid1 <- tidy(fit1) %>% filter(term == x)
    r2_1 <- performance::r2(fit1)

    results[[i]] <- tibble(
      outcome = y,
      respiratory = x,
      model = "Model1_two_timepoints_resp_only",
      beta = tid1$estimate,
      p = tid1$p.value,
      AIC = AIC(fit1),
      marginal_R2 = r2_1$R2_marginal,
      conditional_R2 = r2_1$R2_conditional,
      adj_R2 = NA
    )
    i <- i + 1

    ### ----------------- MODEL 2 -----------------
    fit2 <- lmer(as.formula(paste(y, "~", x, "+ data_age + subject_female + (1|record_id)")), data = data12c)
    tid2 <- tidy(fit2) %>% filter(term == x)
    r2_2 <- performance::r2(fit2)

    results[[i]] <- tibble(
      outcome = y,
      respiratory = x,
      model = "Model2_two_timepoints_with_covariates",
      beta = tid2$estimate,
      p = tid2$p.value,
      AIC = AIC(fit2),
      marginal_R2 = r2_2$R2_marginal,
      conditional_R2 = r2_2$R2_conditional,
      adj_R2 = NA
    )
    i <- i + 1

    ### ----------------- MODEL 3 -----------------
    fit3 <- lm(as.formula(paste(y, "~", x)), data = v1)
    tid3 <- tidy(fit3) %>% filter(term == x)

    results[[i]] <- tibble(
      outcome = y,
      respiratory = x,
      model = "Model3_visit1_resp_only",
      beta = tid3$estimate,
      p = tid3$p.value,
      AIC = NA,
      marginal_R2 = NA,
      conditional_R2 = NA,
      adj_R2 = summary(fit3)$adj.r.squared
    )
    i <- i + 1

    ### ----------------- MODEL 4 -----------------
    fit4 <- lm(as.formula(paste(y, "~", x, "+ data_age + subject_female")), data = v1c)
    tid4 <- tidy(fit4) %>% filter(term == x)

    results[[i]] <- tibble(
      outcome = y,
      respiratory = x,
      model = "Model4_visit1_with_covariates",
      beta = tid4$estimate,
      p = tid4$p.value,
      AIC = NA,
      marginal_R2 = NA,
      conditional_R2 = NA,
      adj_R2 = summary(fit4)$adj.r.squared
    )
    i <- i + 1
  }
}

final_results <- bind_rows(results)

### ----------------- BUILD RANKED SUMMARY -----------------

ranked_summary <- final_results %>%
  group_by(outcome, respiratory) %>%
  summarise(
    mean_beta = mean(abs(beta), na.rm = TRUE),
    significant_models = sum(p < 0.05, na.rm = TRUE),
    mean_adj_R2 = mean(adj_R2, na.rm = TRUE),
    mean_marginal_R2 = mean(marginal_R2, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    rank_score = rank(-mean_beta) +
                 rank(-significant_models) +
                 rank(-mean_adj_R2, na.last = TRUE) +
                 rank(-mean_marginal_R2, na.last = TRUE)
  ) %>%
  arrange(outcome, rank_score)

### ----------------- EXPORT CSV -----------------
write_csv(ranked_summary, "respiratory_predictor_rankings.csv")

print(ranked_summary)


# A tibble: 40 × 7
   outcome respiratory                 mean_beta significant_models mean_adj_R2
   <chr>   <chr>                           <dbl>              <int>       <dbl>
 1 fss_sum mip_post_max_percentpredic…     0.579                  4     0.260  
 2 fss_sum mip_post_max_percentpredic…     0.563                  4     0.256  
 3 fss_sum mip_post_max                    0.508                  4     0.200  
 4 fss_sum sindex_post_max                 0.501                  4     0.191  
 5 fss_sum pif_post_max                    0.483                  4     0.166  
 6 fss_sum mip_pre_max_percentpredict…     0.446                  2     0.0525 
 7 fss_sum mip_pre_max_percentpredict…     0.434                  2     0.0450 
 8 fss_sum mip_pre_max                     0.365                  2     0.00937
 9 fss_sum pif_pre_max                     0.362                  2     0.0330 
10 fss_sum sindex_pre_max                  0.359                  2     0.0383 
# ℹ 30 more rows
# ℹ 

In [14]:
library(lme4)
library(broom.mixed)
library(dplyr)

outcomes <- c("fss_sum","woods_sum")
resp_vars <- c("mip_pre_max","smip_pre_max","fit_pre_max",
               "mip_post_max","smip_post_max","fit_post_max")

results <- list()

add_result <- function(model_type, outcome, respiratory, fit, varname) {
  coef_row <- tidy(fit) %>% filter(term == varname)
  
  tibble(
    model_type = model_type,
    outcome = outcome,
    respiratory = respiratory,
    estimate = coef_row$estimate,
    std.error = coef_row$std.error,
    statistic = coef_row$statistic,
    p.value = coef_row$p.value
  )
}

i <- 1

for (y in outcomes) {
  for (x in resp_vars) {
    
    # ----------------------- MODEL 1 -----------------------
    data12 <- df_long %>%
      dplyr::select(record_id, !!sym(y), !!sym(x), time_point)
    
    fit1 <- lmer(as.formula(paste(y, "~", x, "+ (1|record_id)")), data = data12)
    results[[i]] <- add_result("Model1_two_timepoints_resp_only", y, x, fit1, x)
    i <- i + 1
    
    # ----------------------- MODEL 2 -----------------------
    data12c <- df_long %>%
      dplyr::select(record_id, !!sym(y), !!sym(x), time_point, data_age, subject_female)
    
    fit2 <- lmer(as.formula(paste(y, "~", x, "+ data_age + subject_female + (1|record_id)")), data = data12c)
    results[[i]] <- add_result("Model2_two_timepoints_with_covariates", y, x, fit2, x)
    i <- i + 1
    
    # ----------------------- MODEL 3 -----------------------
    v1 <- df_long %>%
      filter(time_point == 1) %>%
      dplyr::select(record_id, !!sym(y), !!sym(x))
    
    fit3 <- lm(as.formula(paste(y, "~", x)), data = v1)
    results[[i]] <- add_result("Model3_visit1_resp_only", y, x, fit3, x)
    i <- i + 1
    
    # ----------------------- MODEL 4 -----------------------
    v1c <- df_long %>%
      filter(time_point == 1) %>%
      dplyr::select(record_id, !!sym(y), !!sym(x), data_age, subject_female)
    
    fit4 <- lm(as.formula(paste(y, "~", x, "+ data_age + subject_female")), data = v1c)
    results[[i]] <- add_result("Model4_visit1_with_covariates", y, x, fit4, x)
    i <- i + 1
  }
}

final_results <- bind_rows(results)

print(final_results)


# A tibble: 48 × 7
   model_type          outcome respiratory estimate std.error statistic p.value
   <chr>               <chr>   <chr>          <dbl>     <dbl>     <dbl>   <dbl>
 1 Model1_two_timepoi… fss_sum mip_pre_max -0.180      0.0470    -3.83  5.83e-4
 2 Model2_two_timepoi… fss_sum mip_pre_max -0.191      0.0471    -4.06  3.30e-4
 3 Model3_visit1_resp… fss_sum mip_pre_max -0.149      0.150     -0.989 3.35e-1
 4 Model4_visit1_with… fss_sum mip_pre_max -0.164      0.150     -1.09  2.88e-1
 5 Model1_two_timepoi… fss_sum smip_pre_m… -0.0208     0.0112    -1.86  7.04e-2
 6 Model2_two_timepoi… fss_sum smip_pre_m… -0.0248     0.0117    -2.12  4.10e-2
 7 Model3_visit1_resp… fss_sum smip_pre_m… -0.00983    0.0122    -0.805 4.30e-1
 8 Model4_visit1_with… fss_sum smip_pre_m… -0.00962    0.0128    -0.752 4.62e-1
 9 Model1_two_timepoi… fss_sum fit_pre_max -0.0856     0.114     -0.752 4.57e-1
10 Model2_two_timepoi… fss_sum fit_pre_max -0.0760     0.113     -0.670 5.08e-1
# ℹ 38 more rows
# ℹ 

In [4]:
library(lme4)
library(lmerTest)
library(broom.mixed)
library(dplyr)
library(purrr)
library(stringr)

#-----------------------------------------------
# Define outcomes and respiratory predictors
#-----------------------------------------------
outcomes <- c("fss_sum", "woods_sum")
resp_vars <- c("mip_pre_max", "smip_pre_max", "fit_pre_max",
               "mip_post_max", "smip_post_max", "fit_post_max")

# Your full dataset is df_long
# Must contain:
#   record_id
#   time_point
#   data_age
#   subject_female
#   outcomes + respiratory vars


#------------------------------------------------
# Helper function to run one model and tidy it
#------------------------------------------------
run_model <- function(formula, data, outcome, resp_var, model_type){
  model <- lmer(formula, data = data)
  tidy_mod <- broom.mixed::tidy(model, effects = "fixed")
  
  tidy_mod %>%
    filter(term == resp_var) %>%
    mutate(
      outcome = outcome,
      respiratory = resp_var,
      model_type = model_type
    )
}


#------------------------------------------------
# Main loop
#------------------------------------------------
results <- list()

for(outcome in outcomes){
  for(resp in resp_vars){
    
    #-----------------------------
    # Model 1: Visit 1 + Visit 2
    # Only respiratory predictor
    #-----------------------------
    form1 <- as.formula(
      paste0(outcome, " ~ ", resp, " + (1|record_id)")
    )
    
    df1 <- df_long %>% filter(time_point %in% c(1, 2))
    
    results[[length(results)+1]] <- run_model(
      form1, df1, outcome, resp, "Model1_two_timepoints_resp_only"
    )
    
    
    #-----------------------------
    # Model 2: Visit 1 + Visit 2
    # respiratory + age + sex
    #-----------------------------
    form2 <- as.formula(
      paste0(outcome, " ~ ", resp, " + data_age + subject_female + (1|record_id)")
    )
    
    results[[length(results)+1]] <- run_model(
      form2, df1, outcome, resp, "Model2_two_timepoints_with_covariates"
    )
    
    
    #-----------------------------
    # Model 3: Visit 1 only
    # respiratory only
    #-----------------------------
    df3 <- df_long %>% filter(time_point == 1)
    
    # Ordinary regression because no repeated measures
    form3 <- as.formula(
      paste0(outcome, " ~ ", resp)
    )
    
    model3 <- lm(form3, data = df3)
    tidy3 <- broom::tidy(model3) %>%
      filter(term == resp) %>%
      mutate(outcome = outcome,
             respiratory = resp,
             model_type = "Model3_visit1_resp_only")
    
    results[[length(results)+1]] <- tidy3
    
    
    #-----------------------------
    # Model 4: Visit 1 only
    # respiratory + age + sex
    #-----------------------------
    form4 <- as.formula(
      paste0(outcome, " ~ ", resp, " + data_age + subject_female")
    )
    
    model4 <- lm(form4, data = df3)
    tidy4 <- broom::tidy(model4) %>%
      filter(term == resp) %>%
      mutate(outcome = outcome,
             respiratory = resp,
             model_type = "Model4_visit1_with_covariates")
    
    results[[length(results)+1]] <- tidy4
    
  }
}

#---------------------------------------------
# Final combined results table
#---------------------------------------------
final_results <- bind_rows(results) %>%
  select(model_type, outcome, respiratory, estimate, std.error, statistic, p.value)

print(final_results)


: Error in `select()`:
! unused arguments (model_type, outcome, respiratory, estimate, std.error, statistic, p.value)

In [7]:
results[[1]]
