In [20]:
library(tidyverse)
library(mediation)
library(brms)
library(dplyr)
library(tidyr)
library(lme4)
library(ggplot2)

# Import Dataset

In [40]:
df_long <- read.csv("df_RCT.csv")

In [33]:
df_prepost <- df_long %>%
  # ONLY use pre-intervention date
  filter(time_point == 0) %>%
  
  # condition already encoded as 0 = pre-treadmill, 1 = post-treadmill
  mutate(condition = ifelse(condition == 0, 0, 1)) %>%
  
  dplyr::select(
    record_id,
    condition,
    all_of(outcomes),
    matches(paste0("^(", paste(resp_types, collapse = "|"), ")_(pre|post)_max$")),
    all_of(covariates)
  ) %>%
  
  pivot_longer(
    cols = matches(paste0("^(", paste(resp_types, collapse = "|"), ")_(pre|post)_max$")),
    names_to      = c("resp_type", "time", ".value"),
    names_pattern = "(.+)_(pre|post)_(.+)"
  ) %>%
  
  rename(resp_value = max) %>%
  filter(!is.na(resp_value)) %>%
  distinct()


: Error in `mutate()`:
‚Ñπ In argument: `condition = ifelse(condition == 0, 0, 1)`.
Caused by error:
! object 'condition' not found

In [34]:
View(df_prepost)

## New

In [39]:
library(dplyr)
library(tidyr)

# --------------------------------------------------
# LOAD DATA
# --------------------------------------------------
df_long <- read.csv("df_RCT.csv")

# --------------------------------------------------
# VARIABLES
# --------------------------------------------------

outcomes <- c("fss_sum", "woods_sum", "dsq_sum")

resp_vars <- c(
  "mip_pre_max", "smip_pre_max", "fit_pre_max", "id_pre_max", "slopesmip_pre_max",
  "sindex_pre_max", "pif_pre_max", "volume_pre_max",
  "mip_post_max", "smip_post_max", "fit_post_max", "id_post_max", "slopesmip_post_max",
  "pif_post_max", "sindex_post_max", "volume_post_max",
  "cpet_vo2peak_absolute", "cpet_ve", "cpet_vt_peak", "cpet_o2pulse",
  "fmd_percent"
)

covariates <- c("data_age", "data_centimeters", "data_kilograms", "subject_female")

# --------------------------------------------------
# STEP 1 ‚Äî CENTER RESP VARIABLES & CREATE NEW COLUMNS
# --------------------------------------------------

for (v in resp_vars) {
  if (!v %in% names(df_long)) {
    warning("Variable ", v, " not found in df_long, skipping.")
  } else {
    mean_v <- mean(df_long[[v]], na.rm = TRUE)
    new_name <- paste0(v, "_c")
    df_long[[new_name]] <- df_long[[v]] - mean_v
  }
}

# --------------------------------------------------
# STEP 2 ‚Äî CREATE df_prepost (BEST STRUCTURE)
# 2 rows per participant: pre-treadmill (0) and post-treadmill (1)
# USING ONLY time_point = 0 VISIT
# --------------------------------------------------

# Helper function to rename pre/post columns to unified names
rename_resp <- function(df, prefix) {
  rename_at(
    df,
    vars(ends_with("_pre_max")),
    ~gsub("_pre_max", paste0("_", prefix), .)
  ) %>%
  rename_at(
    vars(ends_with("_post_max")),
    ~gsub("_post_max", paste0("_", prefix), .)
  )
}

# Keep ONLY time_point 0 (pre-intervention visit)
df_baseline <- df_long %>% filter(time_point == 1)

# PRE-treadmill dataset (condition = 0)
df_pre <- df_baseline %>%
  mutate(condition = 0) %>%
  dplyr::select(
    record_id, condition, all_of(outcomes), all_of(covariates),
    matches("_pre_max$"),  # original values
    matches("_pre_max_c$") # centered values
  ) %>%
  rename_with(~gsub("_pre_max", "", .x), matches("_pre_max$")) %>%
  rename_with(~gsub("_pre_max_c", "_c", .x), matches("_pre_max_c$"))

# POST-treadmill dataset (condition = 1)
df_post <- df_baseline %>%
  mutate(condition = 1) %>%
  dplyr::select(
    record_id, condition, all_of(outcomes), all_of(covariates),
    matches("_post_max$"),
    matches("_post_max_c$")
  ) %>%
  rename_with(~gsub("_post_max", "", .x), matches("_post_max$")) %>%
  rename_with(~gsub("_post_max_c", "_c", .x), matches("_post_max_c$"))

# Combine into final df_prepost
df_prepost <- bind_rows(df_pre, df_post) %>%
  arrange(record_id, condition)


# --------------------------------------------------
# Export
# --------------------------------------------------
write.csv(df_prepost, file = "df_prepost.csv", row.names = FALSE)


# Outcome Measures

In [None]:
outcomes <- c("fss_sum", "dsq_sum", "dsq_freq_sum", "dsq_severity_sum", "psqi_sum", "psqi_disturbances")
resp_vars <- c("mip_pre_max", "smip_pre_max", "fit_pre_max", "id_pre_max", "slopesmip_pre_max",
               "sindex_pre_max", "pif_pre_max", "volume_pre_max",
                "mip_post_max",  "smip_post_max","fit_post_max", "id_post_max", "slopesmip_post_max", 
                "pif_post_max", "sindex_post_max", "volume_post_max", 
                'cpet_vo2peak_absolute', "cpet_ve", "cpet_vt_peak", "cpet_o2pulse",  
                "fmd_percent")
resp_adjusted_vars <- c("mip_pre_max_percentpredict_1", "mip_pre_max_percentpredict_2", "mip_pre_max_percentpredict_3", "mip_pre_max_percentpredict_4", "mip_pre_max_percentpredict_5", "sindex_pre_max_percentpredict",
                        "mip_post_max_percentpredict_1", "mip_post_max_percentpredict_2", "mip_post_max_percentpredict_3", "mip_post_max_percentpredict_4", "mip_post_max_percentpredict_5", "sindex_post_max_percentpredict")

In [23]:
# Vector of respiratory and related variables to center
resp_vars <- c(
  "mip_pre_max", "smip_pre_max", "fit_pre_max", "id_pre_max", "slopesmip_pre_max",
  "sindex_pre_max", "pif_pre_max", "volume_pre_max",
  "mip_post_max", "smip_post_max", "fit_post_max", "id_post_max", "slopesmip_post_max",
  "pif_post_max", "sindex_post_max", "volume_post_max",
  "cpet_vo2peak_absolute", "cpet_ve", "cpet_vt_peak", "cpet_o2pulse",
  "fmd_percent"
)

# Center each variable in-place: x_centered = x - mean(x, na.rm = TRUE)
for (v in resp_vars) {
  if (!v %in% names(df_long)) {
    warning("Variable ", v, " not found in df_long, skipping.")
  } else {
    m <- mean(df_long[[v]], na.rm = TRUE)
    df_long[[v]] <- df_long[[v]] - m
  }
}

cat("‚úÖ Centering complete for variables:\n")
print(resp_vars)


‚úÖ Centering complete for variables:
 [1] "mip_pre_max"           "smip_pre_max"          "fit_pre_max"           "id_pre_max"           
 [5] "slopesmip_pre_max"     "sindex_pre_max"        "pif_pre_max"           "volume_pre_max"       
 [9] "mip_post_max"          "smip_post_max"         "fit_post_max"          "id_post_max"          
[13] "slopesmip_post_max"    "pif_post_max"          "sindex_post_max"       "volume_post_max"      
[17] "cpet_vo2peak_absolute" "cpet_ve"               "cpet_vt_peak"          "cpet_o2pulse"         
[21] "fmd_percent"          


# Descriptives

# Bivariate Correlations

In [46]:
# Function to calculate both Pearson and Spearman correlations
calculate_correlations <- function(data, outcome, respiratory, data_source) {
  # Remove rows with missing values for these two variables
  clean_data <- data[complete.cases(data[c(outcome, respiratory)]), ]
  
  # Calculate Pearson correlation
  pearson_result <- cor.test(clean_data[[outcome]], clean_data[[respiratory]], 
                             method = "pearson")
  
  # Calculate Spearman correlation
  spearman_result <- cor.test(clean_data[[outcome]], clean_data[[respiratory]], 
                              method = "spearman")
  
  # Return as a data frame
  tibble(
    data_source = data_source,
    outcome = outcome,
    respiratory = respiratory,
    pearson_r = pearson_result$estimate,
    pearson_p = pearson_result$p.value,
    spearman_rho = spearman_result$estimate,
    spearman_p = spearman_result$p.value,
    n = nrow(clean_data)
  )
}

# Initialize results list
correlation_results <- list()

# Calculate correlations for all combinations for BOTH datasets
i <- 1
for (y in outcomes) {
  for (x in resp_vars) {
    # Combined Visit 1 + Visit 2 data
    correlation_results[[i]] <- calculate_correlations(df_long, y, x, "combined_visits")
    i <- i + 1
    
    # Visit 1 only data
    visit1_data <- df_long %>% filter(time_point == 1)
    correlation_results[[i]] <- calculate_correlations(visit1_data, y, x, "visit1_only")
    i <- i + 1
  }
}

# Combine all results
final_correlations <- bind_rows(correlation_results)

# Reorder columns to match your requested format
final_correlations <- final_correlations %>%
  dplyr::select(data_source, outcome, respiratory, pearson_r, pearson_p, spearman_rho, spearman_p, n)

# Print a sample to check
head(final_correlations)

# Save to CSV
write.csv(final_correlations, "bivariate_correlations_all_data.csv", row.names = FALSE)

# Create formatted summary tables sorted by strongest Spearman correlation
summary_correlations <- final_correlations %>%
  group_by(data_source, outcome) %>%
  arrange(data_source, outcome, desc(abs(spearman_rho))) %>%
  mutate(
    pearson_sig = case_when(
      pearson_p < 0.001 ~ "***",
      pearson_p < 0.01 ~ "**",
      pearson_p < 0.05 ~ "*",
      TRUE ~ ""
    ),
    spearman_sig = case_when(
      spearman_p < 0.001 ~ "***",
      spearman_p < 0.01 ~ "**",
      spearman_p < 0.05 ~ "*",
      TRUE ~ ""
    )
  )

# Print summary tables for easy viewing
cat("COMBINED VISITS (1+2) - Correlations with FSS (Fatigue):\n")
summary_correlations %>% 
  filter(data_source == "combined_visits" & outcome == "fss_sum") %>% 
  print(n = 20)

cat("\nCOMBINED VISITS (1+2) - Correlations with Woods MFI (Brain Fog):\n")
summary_correlations %>% 
  filter(data_source == "combined_visits" & outcome == "woods_sum") %>% 
  print(n = 20)

cat("\nVISIT 1 ONLY - Correlations with FSS (Fatigue):\n")
summary_correlations %>% 
  filter(data_source == "visit1_only" & outcome == "fss_sum") %>% 
  print(n = 20)

cat("\nVISIT 1 ONLY - Correlations with Woods MFI (Brain Fog):\n")
summary_correlations %>% 
  filter(data_source == "visit1_only" & outcome == "woods_sum") %>% 
  print(n = 20)

# Save the sorted summary
write.csv(summary_correlations, "sorted_correlations_summary_all_data.csv", row.names = FALSE)

# Optional: Create a wide format for easy comparison between datasets
wide_format <- final_correlations %>%
  dplyr::select(-pearson_p, -spearman_p) %>%
  pivot_wider(
    names_from = data_source,
    values_from = c(pearson_r, spearman_rho, n),
    names_glue = "{data_source}_{.value}"
  ) %>%
  dplyr::select(outcome, respiratory, 
         visit1_only_pearson_r, combined_visits_pearson_r,
         visit1_only_spearman_rho, combined_visits_spearman_rho,
         visit1_only_n, combined_visits_n)

# Save wide format for easy comparison
write.csv(wide_format, "correlations_wide_format_comparison.csv", row.names = FALSE)

cat("\nWide Format Comparison (first 10 rows):\n")
print(head(wide_format, 10))



COMBINED VISITS (1+2) - Correlations with FSS (Fatigue):
# A tibble: 33 √ó 10
# Groups:   data_source, outcome [1]
   data_source     outcome respiratory    pearson_r pearson_p spearman_rho spearman_p     n pearson_sig
   <chr>           <chr>   <chr>              <dbl>     <dbl>        <dbl>      <dbl> <int> <chr>      
 1 combined_visits fss_sum mip_post_max_‚Ä¶    -0.615 0.0000237       -0.605  0.0000352    40 ***        
 2 combined_visits fss_sum mip_post_max_‚Ä¶    -0.562 0.000160        -0.601  0.0000415    40 ***        
 3 combined_visits fss_sum mip_post_max_‚Ä¶    -0.562 0.000160        -0.601  0.0000415    40 ***        
 4 combined_visits fss_sum sindex_post_m‚Ä¶    -0.579 0.000112        -0.594  0.0000662    39 ***        
 5 combined_visits fss_sum mip_post_max_‚Ä¶    -0.571 0.000119        -0.588  0.0000672    40 ***        
 6 combined_visits fss_sum mip_post_max      -0.493 0.00122         -0.582  0.0000806    40 **         
 7 combined_visits fss_sum mip_post_max_‚Ä¶

# OLS

## Covariate List

In [9]:
library(dplyr)
library(broom)
library(ggplot2)

# --------------------------------------------------
# CONFIGURATION - Easily modify these
# --------------------------------------------------
covariates <- c('data_age', 'data_kilograms', 'data_centimeters', 'subject_female')
# Alternative examples:
# covariates <- c('data_age')
# covariates <- c('data_age', 'subject_female', 'education_years')
# covariates <- character()  # for no covariates

# --------------------------------------------------
# Filter to Visit 1 only
# --------------------------------------------------
df_v1 <- df_long %>%
  filter(time_point == 1)

# --------------------------------------------------
# Storage object for regression results
# --------------------------------------------------
results <- list()
idx <- 1

# --------------------------------------------------
# Create diagnostics PDF
# --------------------------------------------------
pdf("correlations_diagnostic_check.pdf", width = 8, height = 10)

for (y in outcomes) {
  for (x in resp_vars) {
    
    # Build formula dynamically based on covariates
    if (length(covariates) > 0) {
      formula_str <- paste(y, "~", x, "+", paste(covariates, collapse = " + "))
    } else {
      formula_str <- paste(y, "~", x)
    }
    formula <- as.formula(formula_str)
    
    # Select variables dynamically
    model_vars <- c(y, x)
    if (length(covariates) > 0) {
      model_vars <- c(model_vars, covariates)
    }
    
    # Remove missing data
    df_model <- df_v1 %>%
      dplyr::select(all_of(model_vars)) %>%
      na.omit()
    
    # Skip if insufficient data
    if (nrow(df_model) < 10) next
    
    # Fit model
    fit <- lm(formula, data = df_model)
    
    # Extract regression summary
    tidy_fit <- tidy(fit)
    r2 <- summary(fit)$adj.r.squared
    
    # Store results
    results[[idx]] <- data.frame(
      outcome = y,
      predictor = x,
      beta = tidy_fit$estimate[2],
      std_error = tidy_fit$std.error[2],
      p_value = tidy_fit$p.value[2],
      adj_r2 = r2,
      N = nrow(df_model),
      covariates_used = paste(covariates, collapse = ", ")
    )
    idx <- idx + 1
    
    # --------------------------------------------------
    # Diagnostics plots for this model
    # --------------------------------------------------
    par(mfrow=c(3,1))
    
    plot(fit, which = 1, main = paste("Residuals vs Fitted:", y, "~", x))
    plot(fit, which = 2, main = paste("Normal Q-Q:", y, "~", x))
    plot(fit, which = 3, main = paste("Scale-Location:", y, "~", x))
    
  }
}

dev.off()

# --------------------------------------------------
# Combine and save regression table
# --------------------------------------------------
results_df <- bind_rows(results)
write.csv(results_df, "visit1_regression_results.csv", row.names = FALSE)

cat("\n‚úÖ DONE!\n")
cat("‚Ä¢ Visit 1 regression models completed\n")
cat("‚Ä¢ Covariates used:", paste(covariates, collapse = ", "), "\n")
cat("‚Ä¢ Diagnostics PDF saved as: correlations_diagnostic_check.pdf\n")
cat("‚Ä¢ Results table saved as: visit1_regression_results.csv\n")

1: In plot.new() :
  Cannot open temporary file 'C:\Users\ZCooper\AppData\Local\Temp\Rtmp0cXP4B\pdf988c1d0b2ac0' for compression (reason: No such file or directory); compression has been turned off for this device
2: In model.matrix.default(mt, mf, contrasts) :
  the response appeared on the right-hand side and was dropped
3: In model.matrix.default(mt, mf, contrasts) :
  problem with term 1 in model.matrix: no columns are assigned
4: In model.matrix.default(object, data = list(bdi_sum = c(7, 7, 4,  :
  the response appeared on the right-hand side and was dropped
5: In model.matrix.default(object, data = list(bdi_sum = c(7, 7, 4,  :
  problem with term 1 in model.matrix: no columns are assigned
6: In model.matrix.default(object, data = list(bdi_sum = c(7, 7, 4,  :
  the response appeared on the right-hand side and was dropped
7: In model.matrix.default(object, data = list(bdi_sum = c(7, 7, 4,  :
  problem with term 1 in model.matrix: no columns are assigned
8: In model.matrix.default(o


‚úÖ DONE!
‚Ä¢ Visit 1 regression models completed
‚Ä¢ Covariates used: data_age, data_kilograms, data_centimeters, subject_female 
‚Ä¢ Diagnostics PDF saved as: correlations_diagnostic_check.pdf
‚Ä¢ Results table saved as: visit1_regression_results.csv


## OG

In [7]:
library(dplyr)
library(broom)
library(ggplot2)

# --------------------------------------------------
# Filter to Visit 1 only
# --------------------------------------------------
df_v1 <- df_long %>%
  filter(time_point == 1)

# --------------------------------------------------
# Storage object for regression results
# --------------------------------------------------
results <- list()
idx <- 1

# --------------------------------------------------
# Create diagnostics PDF
# --------------------------------------------------
pdf("correlations_diagnostic_check.pdf", width = 8, height = 10)

for (y in outcomes) {
  for (x in resp_vars) {
    
    # Build formula: outcome ~ respiratory + covariates
    formula <- as.formula(
      paste(y, "~", x, "+ data_age + subject_female")
    )
    
    # Remove missing data
    df_model <- df_v1 %>%
      dplyr::select(all_of(c(y, x, "data_age", "subject_female"))) %>%
      na.omit()
    
    # Skip if insufficient data
    if (nrow(df_model) < 10) next
    
    # Fit model
    fit <- lm(formula, data = df_model)
    
    # Extract regression summary
    tidy_fit <- tidy(fit)
    r2 <- summary(fit)$adj.r.squared
    
    # Store results
    results[[idx]] <- data.frame(
      outcome = y,
      predictor = x,
      beta = tidy_fit$estimate[2],
      std_error = tidy_fit$std.error[2],
      p_value = tidy_fit$p.value[2],
      adj_r2 = r2,
      N = nrow(df_model)
    )
    idx <- idx + 1
    
    # --------------------------------------------------
    # Diagnostics plots for this model
    # --------------------------------------------------
    par(mfrow=c(3,1))
    
    plot(fit, which = 1, main = paste("Residuals vs Fitted:", y, "~", x))
    plot(fit, which = 2, main = paste("Normal Q-Q:", y, "~", x))
    plot(fit, which = 3, main = paste("Scale-Location:", y, "~", x))
    
  }
}

dev.off()

# --------------------------------------------------
# Combine and save regression table
# --------------------------------------------------
results_df <- bind_rows(results)
write.csv(results_df, "visit1_regression_results.csv", row.names = FALSE)

cat("\n‚úÖ DONE!\n")
cat("‚Ä¢ Visit 1 regression models completed\n")
cat("‚Ä¢ Diagnostics PDF saved as: correlations_diagnostic_check.pdf\n")
cat("‚Ä¢ Results table saved as: visit1_regression_results.csv\n")


In plot.new() :
  Cannot open temporary file 'C:\Users\ZCooper\AppData\Local\Temp\RtmpSkdRJm\pdf303c1a968e9' for compression (reason: No such file or directory); compression has been turned off for this device



‚úÖ DONE!
‚Ä¢ Visit 1 regression models completed
‚Ä¢ Diagnostics PDF saved as: correlations_diagnostic_check.pdf
‚Ä¢ Results table saved as: visit1_regression_results.csv


# Mean Variance Relationship

## 5:09pm

In [12]:
library(ggplot2)
library(dplyr)
library(gridExtra)

create_glm_diagnostic_plots <- function(data, outcome) {

  df <- data %>% dplyr::select(all_of(outcome)) %>% rename(value = all_of(outcome))
  df <- df %>% filter(!is.na(value))

  if (nrow(df) == 0) {
    p1 <- ggplot() + annotate("text", x=0.5, y=0.5, label="No data") +
      theme_void()
    p2 <- p1
    return(list(plots=list(p1, p2), rec=NULL))
  }

  # ---- Distribution Plot ----
  p1 <- ggplot(df, aes(value)) +
    geom_histogram(aes(y=after_stat(density)), bins=30, fill="steelblue", color="black", alpha=.7) +
    geom_density(fill="red", alpha=.4, color=NA) +
    geom_vline(xintercept=0, linetype="dashed", color="darkred", linewidth=1) +
    labs(title=paste("Distribution of", outcome), x="Value", y="Density") +
    theme_minimal()

  pct_zeros <- mean(df$value == 0) * 100
  mean_val <- mean(df$value)
  var_val <- var(df$value)
  var_mean_ratio <- ifelse(mean_val > 0, var_val / mean_val, Inf)

  stats_text <- sprintf("Zeros: %.1f%%\nMean: %.2f\nVar: %.2f\nVar/Mean: %.2f",
                        pct_zeros, mean_val, var_val, var_mean_ratio)

  p1 <- p1 + annotate("text", x = -Inf, y = Inf, label = stats_text, 
                      hjust = -0.1, vjust = 1.2, size = 3)

  # ---- Mean-Variance Plot ----
  if (n_distinct(df$value) > 10) {
    df_bins <- df %>% mutate(bin = ntile(value, 10)) %>%
      group_by(bin) %>% summarise(mean = mean(value), var = var(value)) %>%
      filter(var > 0)

    if (nrow(df_bins) > 1) {
      p2 <- ggplot(df_bins, aes(mean, var)) +
        geom_point(size=3, color="darkgreen") +
        geom_line(aes(y = mean), color="red", linetype="dashed") +
        geom_line(aes(y = mean^2), color="blue", linetype="dashed") +
        labs(title="Mean-Variance Relationship", x="Mean", y="Variance") +
        theme_minimal()
    } else {
      p2 <- ggplot() + 
        annotate("text", x=0.5, y=0.5, label="Insufficient variation") +
        theme_void()
    }

  } else {
    p2 <- ggplot() + 
      annotate("text", x=0.5, y=0.5, label="Too few unique values") +
      theme_void()
  }

  # --- Recommendation ---
  rec <- generate_glm_recommendations(df$value, pct_zeros, var_mean_ratio)

  list(plots=list(p1, p2), rec=rec)
}

generate_glm_recommendations <- function(x, pct_zeros, var_mean_ratio) {

  unique_vals <- n_distinct(x)
  is_count <- all(x >= 0 & x == floor(x))
  is_binary <- unique_vals <= 2

  out <- c()

  if (is_binary) {
    out <- c(out, "PRIMARY: Logistic Regression (binary outcome)")
  } else if (is_count & pct_zeros > 20) {
    if (var_mean_ratio > 2) {
      out <- c(out, "PRIMARY: Zero-Inflated Negative Binomial")
    } else {
      out <- c(out, "PRIMARY: Zero-Inflated Poisson")
    }
  } else if (is_count) {
    if (var_mean_ratio > 2) {
      out <- c(out, "PRIMARY: Negative Binomial")
    } else if (var_mean_ratio >= .8 & var_mean_ratio <= 1.5) {
      out <- c(out, "PRIMARY: Poisson Regression")
    } else {
      out <- c(out, "PRIMARY: Quasi-Poisson or Negative Binomial")
    }
  } else {
    if (pct_zeros > 30) {
      out <- c(out, "PRIMARY: Two-part or Hurdle model")
    } else {
      out <- c(out, "PRIMARY: Gaussian or Gamma GLM")
    }
  }

  if (var_mean_ratio > 5) out <- c(out, "SECONDARY: Strong overdispersion")
  if (pct_zeros > 50) out <- c(out, "SECONDARY: Zero inflation likely")

  out
}

generate_glm_selection_pdf <- function(outcomes, data, pdf_filename = "glm_selection.pdf") {

  pdf(pdf_filename, width=8.5, height=11)  # <-- fixes sizing and removes second PDF bug

  # Cover Page
  grid.newpage()
  grid.text("GLM Model Selection Report", gp=gpar(fontsize=24, fontface="bold"), y=.8)
  grid.text(paste("Generated:", Sys.Date()), gp=gpar(fontsize=14), y=.7)
  grid.text(paste("Variables analyzed:", length(outcomes)), gp=gpar(fontsize=14), y=.6)

  for (outcome in outcomes) {

    grid.newpage()
    result <- create_glm_diagnostic_plots(data, outcome)

    grid.arrange(result$plots[[1]], result$plots[[2]], ncol=1)

    # Recommendation Page
    grid.newpage()
    grid.text(paste("GLM Recommendations for:", outcome), 
              gp=gpar(fontsize=18, fontface="bold"), y=.9)

    if (length(result$rec) == 0) {
      grid.text("No recommendations generated", y=.7)
    } else {
      y <- .8
      for (r in result$rec) {
        grid.text(r, y=y, x=.1, just="left", gp=gpar(fontsize=12))
        y <- y - .05
      }
    }
  }

  dev.off()

  return(pdf_filename)
}

results <- generate_glm_selection_pdf(
  outcomes = outcomes,
  data = df_long
)

## OG

In [11]:
library(ggplot2)
library(dplyr)
library(tidyr)
library(purrr)
library(gridExtra)
library(grid)
library(scales)

#------------------------------------------
# Generate GLM Recommendation (R Version)
#------------------------------------------
generate_glm_recommendation <- function(data) {
  pct_zeros <- mean(data == 0) * 100
  mean_val <- mean(data)
  var_val <- var(data)
  var_mean_ratio <- ifelse(mean_val > 0, var_val / mean_val, Inf)
  unique_vals <- dplyr::n_distinct(data)
  
  is_count <- all(data >= 0 & data == floor(data))
  is_binary <- unique_vals <= 2
  
  recommendations <- c()
  
  if (is_binary) {
    recommendations <- c(recommendations, "PRIMARY: Logistic Regression (binary outcome)")
  } else if (is_count && pct_zeros > 20) {
    if (var_mean_ratio > 2) {
      recommendations <- c(recommendations,
                           "PRIMARY: Zero-Inflated Negative Binomial (excess zeros + overdispersion)")
    } else {
      recommendations <- c(recommendations,
                           "PRIMARY: Zero-Inflated Poisson (excess zeros)")
    }
  } else if (is_count) {
    if (var_mean_ratio > 2) {
      recommendations <- c(recommendations,
                           "PRIMARY: Negative Binomial (overdispersed count data)")
    } else if (var_mean_ratio >= 0.8 && var_mean_ratio <= 1.5) {
      recommendations <- c(recommendations,
                           "PRIMARY: Poisson Regression (variance ‚âà mean)")
    } else {
      recommendations <- c(recommendations,
                           "PRIMARY: Consider Quasi-Poisson or Negative Binomial")
    }
  } else {
    if (pct_zeros > 30) {
      recommendations <- c(recommendations,
                           "PRIMARY: Two-part or Hurdle model (continuous with excess zeros)")
    } else {
      recommendations <- c(recommendations,
                           "PRIMARY: Gaussian GLM (OLS) or Gamma GLM (if positive & right-skewed)")
    }
  }
  
  # Secondary recs
  if (var_mean_ratio > 5) recommendations <- c(recommendations, "SECONDARY: Strong overdispersion detected")
  if (var_mean_ratio < 0.5) recommendations <- c(recommendations, "SECONDARY: Possible underdispersion")
  if (pct_zeros > 50) recommendations <- c(recommendations, "SECONDARY: Consider zero-inflation models")
  
  return(recommendations)
}

#------------------------------------------
# Create KDE + Mean-Variance Diagnostic Plots
#------------------------------------------
create_diagnostic_plots <- function(df, outcome) {
  data_vec <- df[[outcome]] %>% na.omit()
  
  # Distribution + KDE
  p1 <- ggplot(df, aes_string(x = outcome)) +
    geom_histogram(aes(y = ..density..), bins = 30, fill = "steelblue", alpha = 0.6, color = "black") +
    geom_density(fill = "red", alpha = 0.3) +
    geom_vline(xintercept = 0, color = "darkred", linetype = "dashed") +
    labs(title = paste("Distribution of", outcome), y = "Density", x = "Value")
  
  # Mean-Variance Plot
  if (dplyr::n_distinct(data_vec) > 10) {
    temp <- df %>%
      mutate(bin = ntile(.data[[outcome]], 10)) %>%
      group_by(bin) %>%
      summarize(mean = mean(.data[[outcome]], na.rm = TRUE),
                var = var(.data[[outcome]], na.rm = TRUE)) %>%
      drop_na()
    
    p2 <- ggplot(temp, aes(x = mean, y = var)) +
      geom_point(size = 3, color = "darkgreen") +
      geom_line(aes(y = mean), color = "red", linetype = "dashed") +
      geom_line(aes(y = mean^2), color = "blue", linetype = "dashed") +
      scale_x_continuous(trans = "log10", labels = comma) +
      scale_y_continuous(trans = "log10", labels = comma) +
      labs(title = "Mean-Variance Relationship", x = "Mean", y = "Variance")
  } else {
    p2 <- ggplot() +
      annotate("text", x = 0.5, y = 0.5,
               label = "Insufficient unique values for\nmean‚Äìvariance plot", size = 6) +
      theme_void()
  }
  
  list(p1 = p1, p2 = p2)
}

#------------------------------------------
# Create Recommendation Page (text-only)
#------------------------------------------
recommendation_page <- function(outcome, stats, recommendations) {
  text <- paste(
    sprintf("GLM Recommendations for: %s\n\n", outcome),
    sprintf("Sample size: %d\n", stats$n),
    sprintf("Mean: %.3f\nVariance: %.3f\nVar/Mean ratio: %.3f\n", stats$mean, stats$var, stats$var_mean),
    sprintf("Percentage zeros: %.1f%%\n", stats$pct_zeros),
    sprintf("Unique values: %d\nRange: %.2f to %.2f\n", stats$unique_vals, stats$min, stats$max),
    sprintf("Data type: %s\n\n", stats$data_type),
    "Model Recommendations:\n",
    paste(sprintf("- %s", recommendations), collapse = "\n"),
    "\n\nInterpretation Guide:\n",
    "‚Ä¢ Var/Mean ‚âà 1 ‚Üí Poisson OK\n",
    "‚Ä¢ Var/Mean > 2 ‚Üí Overdispersed ‚Üí NegBin\n",
    "‚Ä¢ Var/Mean < 0.8 ‚Üí Underdispersed\n",
    "‚Ä¢ High zeros ‚Üí Hurdle / Zero-inflated\n",
    "‚Ä¢ 0/1 ‚Üí Logistic Regression\n",
    "‚Ä¢ Positive continuous ‚Üí Gamma GLM\n",
    sep = ""
  )
  
  grid.newpage()
  grid.text(text, x = 0.02, y = 0.98, just = c("left", "top"), gp = gpar(fontsize = 10))
}

#------------------------------------------
# Main PDF Generator
#------------------------------------------
generate_glm_selection_pdf <- function(outcomes, data, pdf_filename = NULL) {
  
  if (is.null(pdf_filename)) {
    pdf_filename <- paste0("glm_model_selection_", format(Sys.time(), "%Y%m%d_%H%M%S"), ".pdf")
  }
  
  pdf(pdf_filename, width = 8.5, height = 11)
  
  # Title Page
  grid.newpage()
  grid.text("GLM Model Selection Report", y = 0.8, gp = gpar(fontsize = 24, fontface = "bold"))
  grid.text(paste("Generated:", format(Sys.Date(), "%B %d, %Y")), y = 0.7, gp = gpar(fontsize = 14))
  grid.text(paste("Variables analyzed:", length(outcomes)), y = 0.6, gp = gpar(fontsize = 14))
  
  results <- list()
  
  for (outcome in outcomes) {
    if (!(outcome %in% names(data))) next
    
    vec <- na.omit(data[[outcome]])
    if (length(vec) == 0) next
    
    # Stats
    stats <- list(
      n = length(vec),
      mean = mean(vec),
      var = var(vec),
      var_mean = ifelse(mean(vec) > 0, var(vec)/mean(vec), Inf),
      pct_zeros = mean(vec == 0) * 100,
      unique_vals = n_distinct(vec),
      min = min(vec),
      max = max(vec),
      data_type =
        if (n_distinct(vec) <= 2) "Binary"
        else if (all(vec >= 0 & vec == floor(vec))) "Count"
        else "Continuous"
    )
    
    recs <- generate_glm_recommendation(vec)
    results[[outcome]] <- recs
    
    # Plots
    plots <- create_diagnostic_plots(data, outcome)
    grid.arrange(plots$p1, plots$p2, ncol = 2,
                 top = textGrob(paste("GLM Diagnostics:", outcome),
                                gp = gpar(fontsize = 16, fontface = "bold")))
    
    # Page: Recommendations
    recommendation_page(outcome, stats, recs)
  }
  
  dev.off()
  
  return(list(pdf = pdf_filename, recommendations = results))
}

results <- generate_glm_selection_pdf(
  outcomes = outcomes,
  data = df_long
)



# OLS Moderation Analysis

## 1036

In [17]:
library(dplyr)
library(tidyr)
library(lme4)   # for glmer.nb

# --------------------------------------------------
# CONFIGURATION
# --------------------------------------------------
covariates    <- c()
outcomes      <- c("fss_sum", "woods_sum", "dsq_sum")
resp_types    <- c("mip", "smip", "fit", "id", "slopesmip", "sindex", "pif", "volume")
condition_var <- "condition"   # 0 = pre, 1 = post

# --------------------------------------------------
# Create long format dataframe for pre/post analysis
# --------------------------------------------------
df_prepost <- df_long %>%
  filter(time_point %in% c(1, 2)) %>%
  mutate(condition = ifelse(time_point == 1, 0, 1)) %>%
  dplyr::select(
    record_id,
    condition,
    all_of(outcomes),
    matches(paste0("^(", paste(resp_types, collapse = "|"), ")_(pre|post)_max$")),
    all_of(covariates)
  ) %>%
  pivot_longer(
    cols = matches(paste0("^(", paste(resp_types, collapse = "|"), ")_(pre|post)_max$")),
    names_to      = c("resp_type", "time", ".value"),
    names_pattern = "(.+)_(pre|post)_(.+)"
  ) %>%
  rename(resp_value = max) %>%
  filter(!is.na(resp_value)) %>%
  distinct()

# Optional sanity check:
# df_prepost %>% count(record_id, resp_type, condition)

# --------------------------------------------------
# GLMM model comparison: random intercept vs slope vs intercept+slope
# --------------------------------------------------
model_comp_list <- list()  # for fatigue_glmm_model_comparison.csv
reg_results     <- list()  # for fatigue_glmm_regressionresults.csv
comp_idx        <- 1
reg_idx         <- 1

for (y in outcomes) {
  for (resp in resp_types) {
    
    # Subset to this respiratory measure
    df_model <- df_prepost %>%
      filter(resp_type == resp) %>%
      dplyr::select(record_id, condition, resp_value, all_of(y), all_of(covariates)) %>%
      na.omit()
    
    # Need reasonable data to fit a GLMM
    if (nrow(df_model) < 10) next
    if (length(unique(df_model[[condition_var]])) < 2) next
    if (length(unique(df_model$record_id)) < 5) next  # simple guardrail
    
    # --------------------------------------------------
    # Fixed part of the formula
    # outcome ~ resp_value * condition + covariates
    # --------------------------------------------------
    if (length(covariates) > 0) {
      fixed_rhs <- paste("resp_value * condition",
                         paste(covariates, collapse = " + "),
                         sep = " + ")
    } else {
      fixed_rhs <- "resp_value * condition"
    }
    
    # Random structures
    form_int   <- as.formula(paste(y, "~", fixed_rhs, "+ (1 | record_id)"))
    form_slope <- as.formula(paste(y, "~", fixed_rhs, "+ (0 + resp_value | record_id)"))
    form_both  <- as.formula(paste(y, "~", fixed_rhs, "+ (1 + resp_value | record_id)"))
    
    # --------------------------------------------------
    # Fit models with error-only tryCatch
    # (warnings will print but not crash)
    # --------------------------------------------------
    fit_int <- suppressWarnings(
      tryCatch(
        glmer.nb(
          form_int,
          data    = df_model,
          control = glmerControl(optimizer = "bobyqa",
                                 optCtrl   = list(maxfun = 2e5))
        ),
        error = function(e) {
          message("Error (random intercept) for ", y, " ~ ", resp, ": ", conditionMessage(e))
          NULL
        }
      )
    )
    
    fit_slope <- suppressWarnings(
      tryCatch(
        glmer.nb(
          form_slope,
          data    = df_model,
          control = glmerControl(optimizer = "bobyqa",
                                 optCtrl   = list(maxfun = 2e5))
        ),
        error = function(e) {
          message("Error (random slope) for ", y, " ~ ", resp, ": ", conditionMessage(e))
          NULL
        }
      )
    )
    
    fit_both <- suppressWarnings(
      tryCatch(
        glmer.nb(
          form_both,
          data    = df_model,
          control = glmerControl(optimizer = "bobyqa",
                                 optCtrl   = list(maxfun = 2e5))
        ),
        error = function(e) {
          message("Error (random intercept + slope) for ", y, " ~ ", resp, ": ", conditionMessage(e))
          NULL
        }
      )
    )
    
    # --------------------------------------------------
    # Collect AICs
    # --------------------------------------------------
    aic_int   <- if (!is.null(fit_int))   AIC(fit_int)   else NA_real_
    aic_slope <- if (!is.null(fit_slope)) AIC(fit_slope) else NA_real_
    aic_both  <- if (!is.null(fit_both))  AIC(fit_both)  else NA_real_
    
    aic_vec <- c(
      random_intercept  = aic_int,
      random_slope      = aic_slope,
      random_int_slope  = aic_both
    )
    
    # If all failed, skip
    if (all(is.na(aic_vec))) next
    
    # Choose best random structure
    best_name <- names(which.min(aic_vec))
    best_fit  <- switch(
      best_name,
      random_intercept = fit_int,
      random_slope     = fit_slope,
      random_int_slope = fit_both
    )
    
    # Just in case
    if (is.null(best_fit)) next
    
    # Save model comparison info
    model_comp_list[[comp_idx]] <- data.frame(
      outcome       = y,
      resp_type     = resp,
      N             = nrow(df_model),
      n_subjects    = length(unique(df_model$record_id)),
      covariates    = if (length(covariates) > 0) paste(covariates, collapse = ", ") else "none",
      AIC_random_intercept   = aic_int,
      AIC_random_slope       = aic_slope,
      AIC_random_int_slope   = aic_both,
      best_random_structure  = best_name,
      stringsAsFactors       = FALSE
    )
    comp_idx <- comp_idx + 1
    
    # --------------------------------------------------
    # Extract interaction term from best model
    # --------------------------------------------------
    coef_mat  <- summary(best_fit)$coefficients
    term_name <- "resp_value:condition"  # interaction with centered resp_value
    
    if (term_name %in% rownames(coef_mat)) {
      est <- coef_mat[term_name, "Estimate"]
      se  <- coef_mat[term_name, "Std. Error"]
      z   <- coef_mat[term_name, "z value"]
      p   <- coef_mat[term_name, "Pr(>|z|)"]
    } else {
      est <- se <- z <- p <- NA_real_
    }
    
    reg_results[[reg_idx]] <- data.frame(
      outcome          = y,
      resp_type        = resp,
      best_random_str  = best_name,
      N                = nrow(df_model),
      n_subjects       = length(unique(df_model$record_id)),
      beta_interaction = est,
      se_interaction   = se,
      z_interaction    = z,
      p_interaction    = p,
      covariates       = if (length(covariates) > 0) paste(covariates, collapse = ", ") else "none",
      stringsAsFactors = FALSE
    )
    reg_idx <- reg_idx + 1
  }
}

# --------------------------------------------------
# Save outputs as CSVs
# --------------------------------------------------
fatigue_glmm_model_comparison <- bind_rows(model_comp_list)
write.csv(
  fatigue_glmm_model_comparison,
  "fatigue_glmm_model_comparison.csv",
  row.names = FALSE
)

fatigue_glmm_regressionresults <- bind_rows(reg_results)
write.csv(
  fatigue_glmm_regressionresults,
  "fatigue_glmm_regressionresults.csv",
  row.names = FALSE
)

cat("\n‚úÖ DONE!\n")
cat("‚Ä¢ NB GLMMs fit with three random structures per outcome √ó resp_type\n")
cat("‚Ä¢ Model comparison saved to: fatigue_glmm_model_comparison.csv\n")
cat("‚Ä¢ Interaction term results saved to: fatigue_glmm_regressionresults.csv\n")






‚úÖ DONE!
‚Ä¢ NB GLMMs fit with three random structures per outcome √ó resp_type
‚Ä¢ Model comparison saved to: fatigue_glmm_model_comparison.csv
‚Ä¢ Interaction term results saved to: fatigue_glmm_regressionresults.csv


## OG

In [16]:
library(dplyr)
library(broom)
library(broom.mixed)
library(ggplot2)
library(lme4)
library(lmerTest)
library(car)
library(purrr)

# --------------------------------------------------
# CONFIGURATION
# --------------------------------------------------
covariates <- c('data_age', 'subject_female')
outcomes <- c("fss_sum", "woods_sum", "dsq_sum")
resp_vars_base <- c("mip", "smip", "fit", "id", "slopesmip", "sindex", "pif", "volume")

# --------------------------------------------------
# Create long format dataframe for pre/post analysis
# --------------------------------------------------
df_prepost <- df_long %>%
  # Filter to include both pre and post time points
  filter(time_point %in% c(1, 2)) %>%
  # Create condition variable: 0 = pre, 1 = post
  mutate(condition = ifelse(time_point == 1, 0, 1)) %>%
  # Select relevant columns
  dplyr::select(record_id, condition, all_of(outcomes), 
                matches(paste0("^(", paste(resp_vars_base, collapse = "|"), ")_(pre|post)_max$")),
                all_of(covariates)) %>%
  # Reshape to long format for respiratory variables
  pivot_longer(
    cols = matches(paste0("^(", paste(resp_vars_base, collapse = "|"), ")_(pre|post)_max$")),
    names_to = c("resp_type", "time", ".value"),
    names_pattern = "(.+)_(pre|post)_(.+)"
  ) %>%
  rename(resp_value = max) %>%
  filter(!is.na(resp_value)) %>%
  # Remove duplicate rows if any
  distinct()

# --------------------------------------------------
# Storage for moderation results
# --------------------------------------------------
moderation_results <- list()
plot_data <- list()  # Store data for significant interactions
idx <- 1

# --------------------------------------------------
# Create output PDFs
# --------------------------------------------------
pdf("moderation_scatterplots.pdf", width = 10, height = 8)
pdf("moderation_diagnostics.pdf", width = 10, height = 8)

# --------------------------------------------------
# Moderation analysis loop
# --------------------------------------------------
for (y in outcomes) {
  for (resp in resp_vars_base) {
    
    # Check if we have data for this respiratory variable
    if (!resp %in% unique(df_prepost$resp_type)) next
    
    # Prepare data for this specific analysis
    df_analysis <- df_prepost %>%
      filter(resp_type == resp) %>%
      dplyr::select(record_id, condition, all_of(y), resp_value, all_of(covariates)) %>%
      rename(outcome = !!y) %>%
      na.omit()
    
    # Skip if insufficient data
    if (nrow(df_analysis) < 20 || length(unique(df_analysis$record_id)) < 10) {
      cat("Skipping", y, "~", resp, "- insufficient data\n")
      next
    }
    
    # Build formula for mixed effects model with moderation
    if (length(covariates) > 0) {
      formula <- as.formula(
        paste("outcome ~ resp_value * condition +", 
              paste(covariates, collapse = " + "), 
              "+ (1 | record_id)")
      )
    } else {
      formula <- as.formula("outcome ~ resp_value * condition + (1 | record_id)")
    }
    
    # Fit mixed effects model
    fit <- lmer(formula, data = df_analysis)
    
    # Extract results
    tidy_fit <- tidy(fit)
    
    # Calculate R-squared manually if MuMIn not available
    tryCatch({
      if (!require(MuMIn, quietly = TRUE)) {
        # Manual R-squared calculation
        var_fixed <- var(predict(fit, re.form = NA))
        var_random <- var(predict(fit) - predict(fit, re.form = NA))
        var_resid <- var(residuals(fit))
        var_total <- var_fixed + var_random + var_resid
        marginal_r2 <- var_fixed / var_total
        conditional_r2 <- (var_fixed + var_random) / var_total
        r2 <- c(marginal_r2, conditional_r2)
      } else {
        r2 <- MuMIn::r.squaredGLMM(fit)
      }
    }, error = function(e) {
      r2 <- c(NA, NA)  # If R-squared calculation fails
    })
    
    # Store moderation results
    interaction_term <- "resp_value:condition"
    interaction_row <- tidy_fit[tidy_fit$term == interaction_term, ]
    
    moderation_results[[idx]] <- data.frame(
      outcome = y,
      predictor = resp,
      interaction_term = interaction_term,
      interaction_beta = ifelse(nrow(interaction_row) > 0, interaction_row$estimate, NA),
      interaction_se = ifelse(nrow(interaction_row) > 0, interaction_row$std.error, NA),
      interaction_t = ifelse(nrow(interaction_row) > 0, interaction_row$statistic, NA),
      interaction_p = ifelse(nrow(interaction_row) > 0, interaction_row$p.value, NA),
      marginal_r2 = ifelse(exists("r2"), r2[1], NA),
      conditional_r2 = ifelse(exists("r2"), r2[2], NA),
      N_observations = nrow(df_analysis),
      N_subjects = length(unique(df_analysis$record_id)),
      covariates_used = paste(covariates, collapse = ", ")
    )
    
    # --------------------------------------------------
    # Create scatterplot for significant interactions (p < 0.05)
    # --------------------------------------------------
    if (nrow(interaction_row) > 0 && !is.na(interaction_row$p.value) && interaction_row$p.value < 0.05) {
      
      # Create scatterplot with regression lines by condition
      p <- ggplot(df_analysis, aes(x = resp_value, y = outcome, color = factor(condition))) +
        geom_point(alpha = 0.6) +
        geom_smooth(method = "lm", se = TRUE) +
        scale_color_manual(values = c("0" = "blue", "1" = "red"),
                         labels = c("0" = "Pre", "1" = "Post"),
                         name = "Condition") +
        labs(title = paste("Moderation:", y, "~", resp, "√ó Condition"),
             subtitle = paste("Interaction p =", round(interaction_row$p.value, 4)),
             x = resp,
             y = y) +
        theme_minimal()
      
      print(p)
      
      # Also create a simple effects plot (pre vs post separately)
      p_simple <- ggplot(df_analysis, aes(x = resp_value, y = outcome)) +
        geom_point(alpha = 0.6) +
        geom_smooth(method = "lm", se = TRUE) +
        facet_wrap(~ condition, labeller = as_labeller(c("0" = "Pre", "1" = "Post"))) +
        labs(title = paste("Simple Effects:", y, "~", resp),
             subtitle = paste("Interaction p =", round(interaction_row$p.value, 4)),
             x = resp,
             y = y) +
        theme_minimal()
      
      print(p_simple)
    }
    
    # --------------------------------------------------
    # Diagnostic plots
    # --------------------------------------------------
    par(mfrow = c(2, 2))
    
    # 1. Q-Q plot for residuals
    qqnorm(residuals(fit), main = paste("Q-Q Plot:", y, "~", resp))
    qqline(residuals(fit))
    
    # 2. Residuals vs Fitted
    plot(fitted(fit), residuals(fit), 
         main = paste("Residuals vs Fitted:", y, "~", resp),
         xlab = "Fitted values", ylab = "Residuals")
    abline(h = 0, col = "red")
    
    # 3. Scale-Location plot
    plot(fitted(fit), sqrt(abs(residuals(fit))),
         main = paste("Scale-Location:", y, "~", resp),
         xlab = "Fitted values", ylab = "‚àö|Standardized residuals|")
    
    # 4. Check for multicollinearity (VIF for fixed effects)
    tryCatch({
      vif_values <- vif(fit)
      barplot(vif_values, main = paste("VIF Values:", y, "~", resp),
              ylab = "VIF", las = 2, cex.names = 0.7)
      abline(h = 5, col = "red", lty = 2)
      abline(h = 10, col = "red", lty = 2)
    }, error = function(e) {
      plot(1, type = "n", axes = FALSE, xlab = "", ylab = "",
           main = paste("VIF not available:", y, "~", resp))
      text(1, 1, "VIF calculation failed", cex = 0.8)
    })
    
    idx <- idx + 1
  }
}

dev.off()  # Close PDFs

# --------------------------------------------------
# Save moderation results
# --------------------------------------------------
moderation_df <- bind_rows(moderation_results)
write.csv(moderation_df, "moderation_results.csv", row.names = FALSE)

# --------------------------------------------------
# Create summary of significant interactions
# --------------------------------------------------
significant_interactions <- moderation_df %>%
  filter(interaction_p < 0.05) %>%
  arrange(interaction_p)

if (nrow(significant_interactions) > 0) {
  write.csv(significant_interactions, "significant_moderation_effects.csv", row.names = FALSE)
  cat("\nüéØ SIGNIFICANT MODERATION EFFECTS FOUND!\n")
  print(significant_interactions[, c("outcome", "predictor", "interaction_p", "interaction_beta")])
} else {
  cat("\n‚ùå No significant moderation effects found (p < 0.05)\n")
}

# --------------------------------------------------
# Completion message
# --------------------------------------------------
cat("\n‚úÖ MODERATION ANALYSIS COMPLETE!\n")
cat("‚Ä¢ Pre/post moderation models:", nrow(moderation_df), "\n")
cat("‚Ä¢ Significant interactions:", nrow(significant_interactions), "\n")
cat("‚Ä¢ Results saved as: moderation_results.csv\n")
cat("‚Ä¢ Scatterplots saved as: moderation_scatterplots.pdf\n")
cat("‚Ä¢ Diagnostics saved as: moderation_diagnostics.pdf\n")
cat("‚Ä¢ Covariates used:", paste(covariates, collapse = ", "), "\n")

package ‚Äòbroom.mixed‚Äô was built under R version 4.3.3 


`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'


In plot.new() :
  Cannot open temporary file 'C:\Users\ZCooper\AppData\Local\Temp\Rtmp0cXP4B\pdf988c521b946' for compression (reason: No such file or directory); compression has been turned off for this device



üéØ SIGNIFICANT MODERATION EFFECTS FOUND!
    outcome predictor interaction_p interaction_beta
1   fss_sum slopesmip   0.000459908      -0.23851623
2   fss_sum        id   0.001594162       0.99439955
3   dsq_sum      smip   0.007698588      -0.02006350
4   dsq_sum       mip   0.013574379      -0.11790156
5 woods_sum      smip   0.016345720      -0.01608678

‚úÖ MODERATION ANALYSIS COMPLETE!
‚Ä¢ Pre/post moderation models: 24 
‚Ä¢ Significant interactions: 5 
‚Ä¢ Results saved as: moderation_results.csv
‚Ä¢ Scatterplots saved as: moderation_scatterplots.pdf
‚Ä¢ Diagnostics saved as: moderation_diagnostics.pdf
‚Ä¢ Covariates used: data_age, subject_female 


# GLM Moderation Analysis

## Fixed effects model

In [30]:
library(dplyr)
library(tidyr)
library(ggplot2)
library(broom)

# --------------------------------------------------
# CONFIGURATION
# --------------------------------------------------
covariates    <- c()
outcomes      <- c("fss_sum", "woods_sum", "dsq_sum")
resp_types    <- c("mip", "smip", "fit", "id", "slopesmip", "sindex", "pif", "volume")
condition_var <- "condition"   # 0 = pre, 1 = post

# --------------------------------------------------
# Create long format dataframe for pre/post analysis
# (assumes df_long already exists and, if you wanted, 
#  respiratory variables have been centered in df_long)
# --------------------------------------------------
df_prepost <- df_long %>%
  # ONLY use pre-intervention date
  filter(time_point == 0) %>%
  
  # condition already encoded as 0 = pre-treadmill, 1 = post-treadmill
  mutate(condition = ifelse(condition == 0, 0, 1)) %>%
  
  dplyr::select(
    record_id,
    condition,
    all_of(outcomes),
    matches(paste0("^(", paste(resp_types, collapse = "|"), ")_(pre|post)_max$")),
    all_of(covariates)
  ) %>%
  
  pivot_longer(
    cols = matches(paste0("^(", paste(resp_types, collapse = "|"), ")_(pre|post)_max$")),
    names_to      = c("resp_type", "time", ".value"),
    names_pattern = "(.+)_(pre|post)_(.+)"
  ) %>%
  
  rename(resp_value = max) %>%
  filter(!is.na(resp_value)) %>%
  distinct()


# --------------------------------------------------
# Storage for regression results
# --------------------------------------------------
reg_results <- list()
reg_idx     <- 1

# --------------------------------------------------
# PDF for diagnostics (one set of plots per model)
# --------------------------------------------------
pdf("fatigue_lm_diagnosticcheck.pdf", width = 8, height = 10)

for (y in outcomes) {
  for (resp in resp_types) {
    
    # Subset to this respiratory measure
    df_model <- df_prepost %>%
      filter(resp_type == resp) %>%
      dplyr::select(record_id, condition, resp_value, all_of(y), all_of(covariates)) %>%
      na.omit()
    
    # Basic guards
    if (nrow(df_model) < 10) next
    if (length(unique(df_model[[condition_var]])) < 2) next
    if (length(unique(df_model$record_id)) < 5) next  # still a small guard, but more lenient than GLMM
    
    # --------------------------------------------------
    # Build formula: outcome ~ resp_value * condition + covariates
    # --------------------------------------------------
    if (length(covariates) > 0) {
      fixed_rhs <- paste("resp_value * condition",
                         paste(covariates, collapse = " + "),
                         sep = " + ")
    } else {
      fixed_rhs <- "resp_value * condition"
    }
    
    formula_str <- paste(y, "~", fixed_rhs)
    form_lm     <- as.formula(formula_str)
    
    # --------------------------------------------------
    # Fit linear model
    # --------------------------------------------------
    fit <- lm(form_lm, data = df_model)
    
    # --------------------------------------------------
    # Diagnostics plots for this model
    # --------------------------------------------------
    par(mfrow = c(3, 1))
    
    # 1) Residuals vs Fitted
    plot(
      fit,
      which = 1,
      main = paste("Residuals vs Fitted:", y, "~ resp_value (", resp, ")")
    )
    
    # 2) Normal Q-Q
    plot(
      fit,
      which = 2,
      main = paste("Normal Q-Q:", y, "~ resp_value (", resp, ")")
    )
    
    # 3) Scale-Location
    plot(
      fit,
      which = 3,
      main = paste("Scale-Location:", y, "~ resp_value (", resp, ")")
    )
    
    # --------------------------------------------------
    # Extract interaction term + model info
    # --------------------------------------------------
    tidy_fit <- broom::tidy(fit)
    glance   <- broom::glance(fit)
    
    term_name <- "resp_value:condition"
    row_int   <- tidy_fit[tidy_fit$term == term_name, ]
    
    if (nrow(row_int) == 1) {
      est <- row_int$estimate
      se  <- row_int$std.error
      t   <- row_int$statistic
      p   <- row_int$p.value
    } else {
      est <- se <- t <- p <- NA_real_
    }
    
    reg_results[[reg_idx]] <- data.frame(
      outcome          = y,
      resp_type        = resp,
      N                = nrow(df_model),
      n_subjects       = length(unique(df_model$record_id)),
      beta_interaction = est,
      se_interaction   = se,
      t_interaction    = t,
      p_interaction    = p,
      r_squared        = glance$r.squared,
      adj_r_squared    = glance$adj.r.squared,
      covariates       = if (length(covariates) > 0) paste(covariates, collapse = ", ") else "none",
      stringsAsFactors = FALSE
    )
    reg_idx <- reg_idx + 1
  }
}

dev.off()  # close fatigue_lm_diagnosticcheck.pdf

# --------------------------------------------------
# Combine and save regression table
# --------------------------------------------------
fatigue_lm_regressionresults <- bind_rows(reg_results)

write.csv(
  fatigue_lm_regressionresults,
  "fatigue_lm_regressionresults.csv",
  row.names = FALSE
)

cat("\n‚úÖ DONE (LM models)!\n")
cat("‚Ä¢ Interaction results saved to: fatigue_lm_regressionresults.csv\n")
cat("‚Ä¢ Diagnostics PDF saved as: fatigue_lm_diagnosticcheck.pdf\n")

# --------------------------------------------------
# Scatterplots with regression lines for significant interactions
# --------------------------------------------------
sig_threshold <- 0.05

sig_rows <- fatigue_lm_regressionresults %>%
  filter(!is.na(p_interaction) & p_interaction < sig_threshold)

pdf("fatigue_lm_scatterplot.pdf", width = 8, height = 6)

if (nrow(sig_rows) > 0) {
  for (i in seq_len(nrow(sig_rows))) {
    y    <- sig_rows$outcome[i]
    resp <- sig_rows$resp_type[i]
    
    df_plot <- df_prepost %>%
      filter(resp_type == resp) %>%
      dplyr::select(record_id, condition, resp_value, all_of(y)) %>%
      na.omit()
    
    p_scatter <- ggplot(
      df_plot,
      aes(x = resp_value, y = .data[[y]], color = factor(condition))
    ) +
      geom_point() +
      geom_smooth(method = "lm", se = FALSE) +
      labs(
        title = paste0(
          "Scatterplot with Regression Lines by Condition\n",
          y, " vs ", resp, " (significant interaction)"
        ),
        x     = paste0(resp, " (resp_value)"),
        y     = y,
        color = "Condition\n(0 = pre, 1 = post)"
      )
    
    print(p_scatter)
  }
} else {
  # If no significant interactions, create a placeholder page
  plot.new()
  title("No significant interactions (p < 0.05)\nNo scatterplots generated.")
}

dev.off()  # close fatigue_lm_scatterplot.pdf

cat("‚Ä¢ Scatterplots PDF saved as: fatigue_lm_scatterplot.pdf\n")


: Error in `mutate()`:
‚Ñπ In argument: `condition = ifelse(condition == 0, 0, 1)`.
Caused by error:
! object 'condition' not found

## Mixed effects model

In [27]:
library(dplyr)
library(tidyr)
library(lme4)   # for glmer.nb

# --------------------------------------------------
# CONFIGURATION
# --------------------------------------------------
covariates    <- c("data_age", "subject_female")
outcomes      <- c("fss_sum", "woods_sum", "dsq_sum")
resp_types    <- c("mip", "smip", "fit", "id", "slopesmip", "sindex", "pif", "volume")
condition_var <- "condition"   # 0 = pre, 1 = post

# We assume df_long has columns like mip_pre_max, mip_post_max, etc.
# --------------------------------------------------
# Create long format dataframe for pre/post analysis
# --------------------------------------------------
df_prepost <- df_long %>%
  filter(time_point %in% c(1, 2)) %>%
  mutate(condition = ifelse(time_point == 1, 0, 1)) %>%
  dplyr::select(
    record_id,
    condition,
    all_of(outcomes),
    matches(paste0("^(", paste(resp_types, collapse = "|"), ")_(pre|post)_max$")),
    all_of(covariates)
  ) %>%
  pivot_longer(
    cols = matches(paste0("^(", paste(resp_types, collapse = "|"), ")_(pre|post)_max$")),
    names_to      = c("resp_type", "time", ".value"),
    names_pattern = "(.+)_(pre|post)_(.+)"
  ) %>%
  rename(resp_value = max) %>%
  filter(!is.na(resp_value)) %>%
  distinct()

# Quick sanity check: should be ~ 2 rows per subject per resp_type
# df_prepost %>% count(record_id, resp_type, condition)


library(dplyr)
library(lme4)

# --------------------------------------------------
# Storage objects
# --------------------------------------------------
model_comp_list <- list()  # for fatigue_glmm_model_comparison.csv
reg_results     <- list()  # for fatigue_glmm_regressionresults.csv
comp_idx        <- 1
reg_idx         <- 1

for (y in outcomes) {
  for (resp in resp_types) {
    
    # Subset to this respiratory measure
    df_model <- df_prepost %>%
      filter(resp_type == resp) %>%
      dplyr::select(record_id, condition, resp_value, all_of(y), all_of(covariates)) %>%
      na.omit()
    
    # Need at least 2 rows per subject and both conditions present
    if (nrow(df_model) < 10) next
    if (length(unique(df_model[[condition_var]])) < 2) next
    if (length(unique(df_model$record_id)) < 5) next  # arbitrary small sample guard
    
    # --------------------------------------------------
    # Fixed part of the formula
    # outcome ~ resp_value * condition + covariates
    # --------------------------------------------------
    if (length(covariates) > 0) {
      fixed_rhs <- paste("resp_value * condition", paste(covariates, collapse = " + "), sep = " + ")
    } else {
      fixed_rhs <- "resp_value * condition"
    }
    
    # Random structures
    form_int   <- as.formula(paste(y, "~", fixed_rhs, "+ (1 | record_id)"))
    form_slope <- as.formula(paste(y, "~", fixed_rhs, "+ (0 + resp_value | record_id)"))
    form_both  <- as.formula(paste(y, "~", fixed_rhs, "+ (1 + resp_value | record_id)"))
    
    # --------------------------------------------------
    # Fit models with tryCatch to avoid crashes
    # --------------------------------------------------
    fit_int <- tryCatch(
      suppressWarnings(
        glmer.nb(form_int, data = df_model,
                control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5)))
      ),
      error = function(e) NULL
    )

    fit_slope <- tryCatch(
      suppressWarnings(
        glmer.nb(form_slope, data = df_model,
                control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5)))
      ),
      error = function(e) NULL
    )

    fit_both <- tryCatch(
      suppressWarnings(
        glmer.nb(form_both, data = df_model,
                control = glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5)))
      ),
      error = function(e) NULL
    )
    
    # --------------------------------------------------
    # Collect AICs
    # --------------------------------------------------
    aic_int   <- if (!is.null(fit_int))   AIC(fit_int)   else NA_real_
    aic_slope <- if (!is.null(fit_slope)) AIC(fit_slope) else NA_real_
    aic_both  <- if (!is.null(fit_both))  AIC(fit_both)  else NA_real_
    
    aic_vec <- c(random_intercept = aic_int,
                 random_slope     = aic_slope,
                 random_int_slope = aic_both)
    
    # If all failed, skip
    if (all(is.na(aic_vec))) next
    
    # Choose best random structure
    best_name <- names(which.min(aic_vec))
    best_fit  <- switch(
      best_name,
      random_intercept = fit_int,
      random_slope     = fit_slope,
      random_int_slope = fit_both
    )
    
    # Just in case
    if (is.null(best_fit)) next
    
    # Save model comparison info
    model_comp_list[[comp_idx]] <- data.frame(
      outcome       = y,
      resp_type     = resp,
      N             = nrow(df_model),
      n_subjects    = length(unique(df_model$record_id)),
      covariates    = if (length(covariates) > 0) paste(covariates, collapse = ", ") else "none",
      AIC_random_intercept   = aic_int,
      AIC_random_slope       = aic_slope,
      AIC_random_int_slope   = aic_both,
      best_random_structure  = best_name,
      stringsAsFactors       = FALSE
    )
    comp_idx <- comp_idx + 1
    
    # --------------------------------------------------
    # Extract interaction term from best model
    # --------------------------------------------------
    coef_mat <- summary(best_fit)$coefficients
    # For numeric 0/1 condition, interaction is resp_value:condition
    term_name <- "resp_value:condition"
    
    if (term_name %in% rownames(coef_mat)) {
      est <- coef_mat[term_name, "Estimate"]
      se  <- coef_mat[term_name, "Std. Error"]
      z   <- coef_mat[term_name, "z value"]
      p   <- coef_mat[term_name, "Pr(>|z|)"]
    } else {
      est <- se <- z <- p <- NA_real_
    }
    
    reg_results[[reg_idx]] <- data.frame(
      outcome          = y,
      resp_type        = resp,
      best_random_str  = best_name,
      N                = nrow(df_model),
      n_subjects       = length(unique(df_model$record_id)),
      beta_interaction = est,
      se_interaction   = se,
      z_interaction    = z,
      p_interaction    = p,
      covariates       = if (length(covariates) > 0) paste(covariates, collapse = ", ") else "none",
      stringsAsFactors = FALSE
    )
    reg_idx <- reg_idx + 1
  }
}

# --------------------------------------------------
# Save outputs as CSVs
# --------------------------------------------------
fatigue_glmm_model_comparison <- bind_rows(model_comp_list)
write.csv(
  fatigue_glmm_model_comparison,
  "fatigue_glmm_model_comparison.csv",
  row.names = FALSE
)

fatigue_glmm_regressionresults <- bind_rows(reg_results)
write.csv(
  fatigue_glmm_regressionresults,
  "fatigue_glmm_regressionresults.csv",
  row.names = FALSE
)

cat("\n‚úÖ DONE!\n")
cat("‚Ä¢ NB GLMMs fit with three random structures per outcome √ó resp_type\n")
cat("‚Ä¢ Model comparison saved to: fatigue_glmm_model_comparison.csv\n")
cat("‚Ä¢ Interaction term results saved to: fatigue_glmm_regressionresults.csv\n")


boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')

boundary (singular) fit: see help('isSingular')


‚úÖ DONE!
‚Ä¢ NB GLMMs fit with three random structures per outcome √ó resp_type
‚Ä¢ Model comparison saved to: fatigue_glmm_model_comparison.csv
‚Ä¢ Interaction term results saved to: fatigue_glmm_regressionresults.csv


# Group Comparison

In [5]:
library(dplyr)
library(broom)

# --------------------------------------------------
# Define respiratory variables
# --------------------------------------------------
resp_vars <- c("mip_pre_max", "mip_pre_max_percentpredict_1", "mip_pre_max_percentpredict_2", "mip_pre_max_percentpredict_3",
               "smip_pre_max", "fit_pre_max", "pif_pre_max", "sindex_pre_max", "volume_pre_max",
               "id_pre_max", "slopesmip_pre_max",
               "mip_post_max", "mip_post_max_percentpredict_1", "mip_post_max_percentpredict_2", "mip_post_max_percentpredict_3",
               "smip_post_max","fit_post_max", "pif_post_max", "sindex_post_max",
               "volume_post_max", "id_post_max", "slopesmip_post_max")

# --------------------------------------------------
# Create pre-post pairs
# --------------------------------------------------

df_v1 <- df_long %>%
  filter(time_point == 1)


# Extract base variable names (without pre/post)
base_vars <- unique(gsub("_(pre|post)_max.*", "", resp_vars))
base_vars <- base_vars[!base_vars %in% c("", "slopesmip")]  # Remove empty strings

# Create pairing list
pre_post_pairs <- list()

for (base_var in base_vars) {
  # Find all pre and post variables for this base variable
  pre_vars <- resp_vars[grepl(paste0("^", base_var, "_pre_max"), resp_vars)]
  post_vars <- resp_vars[grepl(paste0("^", base_var, "_post_max"), resp_vars)]
  
  # Pair them based on the suffix
  for (pre_var in pre_vars) {
    suffix <- gsub(paste0(base_var, "_pre_max"), "", pre_var)
    post_var <- paste0(base_var, "_post_max", suffix)
    
    if (post_var %in% post_vars) {
      pre_post_pairs[[length(pre_post_pairs) + 1]] <- c(pre_var, post_var)
    }
  }
}

# Special handling for slopesmip
if ("slopesmip_pre_max" %in% resp_vars & "slopesmip_post_max" %in% resp_vars) {
  pre_post_pairs[[length(pre_post_pairs) + 1]] <- c("slopesmip_pre_max", "slopesmip_post_max")
}

# --------------------------------------------------
# Function to run paired tests
# --------------------------------------------------
run_paired_tests <- function(pre_var, post_var, data) {
  # Filter to participants with both pre and post values
  paired_data <- data %>%
    dplyr::select(record_id, all_of(c(pre_var, post_var))) %>%
    na.omit()
  
  n_pairs <- nrow(paired_data)
  
  if (n_pairs < 10) {
    return(data.frame(
      pre_var = pre_var,
      post_var = post_var,
      n_pairs = n_pairs,
      pre_mean = NA,
      post_mean = NA,
      mean_diff = NA,
      t_statistic = NA,
      t_p_value = NA,
      w_statistic = NA,
      w_p_value = NA,
      stringsAsFactors = FALSE
    ))
  }
  
  # Calculate means
  pre_mean <- mean(paired_data[[pre_var]], na.rm = TRUE)
  post_mean <- mean(paired_data[[post_var]], na.rm = TRUE)
  mean_diff <- post_mean - pre_mean
  
  # Paired t-test
  t_test <- tryCatch({
    t.test(paired_data[[pre_var]], paired_data[[post_var]], paired = TRUE)
  }, error = function(e) {
    list(statistic = NA, p.value = NA)
  })
  
  # Wilcoxon signed-rank test
  w_test <- tryCatch({
    wilcox.test(paired_data[[pre_var]], paired_data[[post_var]], paired = TRUE, exact = FALSE)
  }, error = function(e) {
    list(statistic = NA, p.value = NA)
  })
  
  return(data.frame(
    pre_var = pre_var,
    post_var = post_var,
    n_pairs = n_pairs,
    pre_mean = pre_mean,
    post_mean = post_mean,
    mean_diff = mean_diff,
    t_statistic = ifelse(is.null(t_test$statistic), NA, t_test$statistic),
    t_p_value = ifelse(is.null(t_test$p.value), NA, t_test$p.value),
    w_statistic = ifelse(is.null(w_test$statistic), NA, w_test$statistic),
    w_p_value = ifelse(is.null(w_test$p.value), NA, w_test$p.value),
    stringsAsFactors = FALSE
  ))
}

# --------------------------------------------------
# Run all paired comparisons
# --------------------------------------------------
paired_results <- list()

for (i in seq_along(pre_post_pairs)) {
  pair <- pre_post_pairs[[i]]
  cat("Running comparison:", pair[1], "vs", pair[2], "\n")
  
  result <- run_paired_tests(pair[1], pair[2], df_v1)
  paired_results[[i]] <- result
}

paired_df <- bind_rows(paired_results)

# --------------------------------------------------
# Format and clean up results
# --------------------------------------------------
formatted_paired <- paired_df %>%
  mutate(
    pre_mean = round(pre_mean, 2),
    post_mean = round(post_mean, 2),
    mean_diff = round(mean_diff, 2),
    t_statistic = round(t_statistic, 3),
    t_p_value = round(t_p_value, 4),
    w_statistic = round(w_statistic, 1),
    w_p_value = round(w_p_value, 4),
    t_sig = case_when(
      t_p_value < 0.001 ~ "***",
      t_p_value < 0.01 ~ "**",
      t_p_value < 0.05 ~ "*",
      TRUE ~ ""
    ),
    w_sig = case_when(
      w_p_value < 0.001 ~ "***",
      w_p_value < 0.01 ~ "**",
      w_p_value < 0.05 ~ "*",
      TRUE ~ ""
    ),
    # Add interpretation
    direction = ifelse(mean_diff > 0, "Increase", "Decrease"),
    consistent_significance = ifelse(t_p_value < 0.05 & w_p_value < 0.05, "Yes", "No")
  ) %>%
  dplyr::select(
    pre_var, post_var, n_pairs,
    pre_mean, post_mean, mean_diff, direction,
    t_statistic, t_p_value, t_sig,
    w_statistic, w_p_value, w_sig,
    consistent_significance
  )

# --------------------------------------------------
# Print results
# --------------------------------------------------
cat("\n==============================================\n")
cat(" PRE vs POST ACTIVITY PAIRED COMPARISONS\n")
cat("==============================================\n")

for (i in 1:nrow(formatted_paired)) {
  row <- formatted_paired[i, ]
  cat(sprintf("\nComparison %d: %s vs %s\n", i, row$pre_var, row$post_var))
  cat(sprintf("N pairs: %d | Pre: %.2f | Post: %.2f | Difference: %.2f (%s)\n", 
              row$n_pairs, row$pre_mean, row$post_mean, row$mean_diff, row$direction))
  cat(sprintf("Paired t-test: t = %.3f, p = %.4f %s\n", 
              row$t_statistic, row$t_p_value, row$t_sig))
  cat(sprintf("Wilcoxon test: W = %.1f, p = %.4f %s\n", 
              row$w_statistic, row$w_p_value, row$w_sig))
  cat(sprintf("Consistent significance: %s\n", row$consistent_significance))
  cat("----------------------------------------------\n")
}

# --------------------------------------------------
# Summary of significant results
# --------------------------------------------------
sig_results <- formatted_paired %>%
  filter(consistent_significance == "Yes")

cat("\n==============================================\n")
cat(" SUMMARY: SIGNIFICANT PRE-POST DIFFERENCES\n")
cat("==============================================\n")
if (nrow(sig_results) > 0) {
  for (i in 1:nrow(sig_results)) {
    row <- sig_results[i, ]
    cat(sprintf("%d. %s: %.2f ‚Üí %.2f (Œî%.2f, %s), p = %.4f\n", 
                i, gsub("_pre_max.*", "", row$pre_var),
                row$pre_mean, row$post_mean, row$mean_diff, row$direction,
                max(row$t_p_value, row$w_p_value, na.rm = TRUE)))
  }
} else {
  cat("No consistently significant pre-post differences found.\n")
}

# --------------------------------------------------
# Save results
# --------------------------------------------------
write.csv(formatted_paired, "pre_post_paired_comparisons.csv", row.names = FALSE)
cat("\nDetailed results saved to 'pre_post_paired_comparisons.csv'\n")

Running comparison: mip_pre_max vs mip_post_max 
Running comparison: mip_pre_max_percentpredict_1 vs mip_post_max_percentpredict_1 
Running comparison: mip_pre_max_percentpredict_2 vs mip_post_max_percentpredict_2 
Running comparison: mip_pre_max_percentpredict_3 vs mip_post_max_percentpredict_3 
Running comparison: smip_pre_max vs smip_post_max 
Running comparison: fit_pre_max vs fit_post_max 
Running comparison: pif_pre_max vs pif_post_max 
Running comparison: sindex_pre_max vs sindex_post_max 
Running comparison: volume_pre_max vs volume_post_max 
Running comparison: id_pre_max vs id_post_max 
Running comparison: slopesmip_pre_max vs slopesmip_post_max 

 PRE vs POST ACTIVITY PAIRED COMPARISONS

Comparison 1: mip_pre_max vs mip_post_max
N pairs: 22 | Pre: 47.68 | Post: 46.14 | Difference: -1.55 (Decrease)
Paired t-test: t = 0.467, p = 0.6453 
Wilcoxon test: W = 150.0, p = 0.4550 
Consistent significance: No
----------------------------------------------

Comparison 2: mip_pre_max_pe

# End