In [1]:
library(readr)
library(dplyr)
library(ggplot2)
library(ggpubr)

clinical <- readr::read_csv("/nfs/dcmb-lgarmire/boweil/EFIGA/DE_label/EFIGA_clinical.csv")

csv_file <- "/nfs/dcmb-lgarmire/boweil/EFIGA/DE_label/subLabel/All_QN_l1_QN.csv"

output_dir <- "/nfs/turbo/umms-lgarmire/home/yhdu/Bowei_NAS/EFIGA/Review/New_smallsubtype_validation_booxplots"
if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

ratio_columns <- c("P_AB40", "P_AB42", "P_NfL", "P_PT181V2","P_PT217")

csv_data <- try(read_csv(csv_file), silent = TRUE)
if (inherits(csv_data, "try-error")) {
  message(paste("Error reading", csv_file, "- stopping."))
} else if (!all(c("ID","labels_refined") %in% colnames(csv_data))) {
  message(paste("CSV file", csv_file, "does not have required columns - stopping."))
} else {
  filtered_csv <- csv_data %>% filter(ID %in% clinical$`...1`)
  clinical_subset <- clinical %>%
    left_join(filtered_csv %>% select(ID, labels_refined),
              by = c("...1" = "ID")) %>%
    filter(!is.na(labels_refined))
  
  if (nrow(clinical_subset) == 0) {
    message(paste("No matching labels for", csv_file, "- stopping."))
  } else {
    for (ratio in ratio_columns) {
      if (!(ratio %in% colnames(clinical_subset))) {
        message(paste("Column", ratio, "not found - skipping."))
        next
      }
      y_label <- if (ratio == "P_PT181V2") {
        "P_PT181"
      } else if (ratio == "P_PT181V2AB42ratio") {
        "PT181_vs_AB42_Ratio"
      } else {
        ratio
      }
      
      df <- clinical_subset %>%
        select(labels_refined, !!sym(ratio)) %>%
        na.omit()
      colnames(df) <- c("Pred_label", "Ratio")
      
      df <- df %>%
        group_by(Pred_label) %>%
        filter(!Ratio %in% boxplot.stats(Ratio)$out) %>%
        ungroup()
          df$Pred_label <- factor(df$Pred_label,
                              levels = c("EMCI1","EMCI2","LMCI1","LMCI2"))
      
      # Generate all pairwise comparisons
      pairs <- combn(levels(df$Pred_label), 2, simplify = FALSE)
      
      # Keep only significant Wilcoxon pairs
      sig_pairs <- list()
      for (p in pairs) {
        temp_df <- df[df$Pred_label %in% p, ]
        if (length(unique(temp_df$Pred_label)) == 2) {
          pv <- wilcox.test(temp_df$Ratio ~ temp_df$Pred_label)$p.value
          if (!is.na(pv) && pv < 0.05) {
            sig_pairs <- c(sig_pairs, list(p))
          }
        }
      }
      
      p <- ggplot(df, aes(x = Pred_label, y = Ratio, color = Pred_label)) +
        geom_boxplot(outlier.shape = NA, fill = NA, size = 0.3) +
        geom_jitter(size = 1, width = 0.2, alpha = 0.7) +
        labs(x = NULL, y = y_label) +
        theme_bw(base_size = 22) +
        theme(
          panel.border   = element_rect(color = "black", fill = NA, size = 1),
          legend.position= "none",
          axis.text      = element_text(size = 23),
          axis.title     = element_text(size = 25)
        )
      
      if (length(sig_pairs) > 0) {
        p <- p + stat_compare_means(
          comparisons = sig_pairs,
          method = "wilcox.test",
          hide.ns = TRUE
        )
      }
      
      png_filename <- file.path(
        output_dir,
        paste0(tools::file_path_sans_ext(basename(csv_file)), "_try", ratio, ".png")
      )
      
      ggsave(filename = png_filename, plot = p, width = 7, height = 6, dpi = 600)
      message(paste("Saved plot for", ratio, "to", png_filename))
    }
  }
}



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


[1m[22mNew names:
[36m•[39m `` -> `...1`
[1mRows: [22m[34m166[39m [1mColumns: [22m[34m71[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (1): labels_matching_UC
[32mdbl[39m (70): ...1, HMDB0000062, HMDB0000651, HMDB0006469, HMDB0002250, HMDB1333...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m717[39m [1mColumns: [22m[34m58[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): labels_matching_UC_big, labels_refined
[3

# KW version

In [1]:
library(readr)
library(dplyr)
library(ggplot2)
library(ggpubr)

clinical <- readr::read_csv("/nfs/dcmb-lgarmire/boweil/EFIGA/DE_label/EFIGA_clinical.csv")

csv_file <- "/nfs/dcmb-lgarmire/boweil/EFIGA/DE_label/subLabel/All_QN_l1_QN.csv"

output_dir <- "/nfs/turbo/umms-lgarmire/home/yhdu/Bowei_NAS/EFIGA/Review/New_smallsubtype_validation_booxplots"
if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

ratio_columns <- c("P_AB40", "P_AB42", "P_NfL", "P_PT181V2","P_PT217")


ratio_columns <- c("P_AB40", "P_AB42", "P_NfL", "P_PT181V2","P_PT217")

csv_data <- try(read_csv(csv_file), silent = TRUE)
if (inherits(csv_data, "try-error")) {
  message(paste("Error reading", csv_file, "- stopping."))
} else if (!all(c("ID","labels_refined") %in% colnames(csv_data))) {
  message(paste("CSV file", csv_file, "does not have required columns - stopping."))
} else {
  filtered_csv <- csv_data %>% filter(ID %in% clinical$`...1`)
  clinical_subset <- clinical %>%
    left_join(filtered_csv %>% select(ID, labels_refined),
              by = c("...1" = "ID")) %>%
    filter(!is.na(labels_refined))}
    clinical_subset


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


[1m[22mNew names:
[36m•[39m `` -> `...1`
[1mRows: [22m[34m166[39m [1mColumns: [22m[34m71[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (1): labels_matching_UC
[32mdbl[39m (70): ...1, HMDB0000062, HMDB0000651, HMDB0006469, HMDB0002250, HMDB1333...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m717[39m [1mColumns: [22m[34m58[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): labels_matching_UC_big, labels_refined
[3

...1,HMDB0000062,HMDB0000651,HMDB0006469,HMDB0002250,HMDB13330,HMDB13331,HMDB13332,HMDB0000222,HMDB00848,⋯,P_PT217,P_PT231,P_AB42AB40ratio,P_TAUAB42ratio,P_PT181V2AB42ratio,P_PT217AB42ratio,P_PT231AB42ratio,ptau181V2BIN,pt181V2AB42ratBIN,labels_refined
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>
9601,-2.5549536,-0.52114879,-1.29204487,-0.29913978,-0.26223063,-0.55846270,-1.31173230,-0.40237180,-0.62075249,⋯,1.1250,4.6650,0.03123457,0.39328063,0.23438735,0.08893281,0.36877470,1,0,LMCI1
9113,-2.5549536,0.26058300,-1.16523583,0.28765494,0.64440375,-0.25483207,0.12571578,1.31581953,0.92005101,⋯,0.4125,3.0800,0.04833625,0.18043478,0.23731884,0.02989130,0.22318841,1,0,LMCI1
90087,-2.2849924,-0.34861684,0.27625265,0.12871467,0.63259596,0.20503741,0.36815518,0.49955703,0.67199352,⋯,0.2505,2.9900,0.04397706,0.11565217,0.16826087,0.02178261,0.26000000,0,0,LMCI2
11175,-2.1951656,0.09422777,0.35706046,0.46627813,0.28765494,-0.47203989,-0.26223063,0.52956650,0.71275555,⋯,0.7810,4.6250,0.05018657,0.18364312,0.22936803,0.05806691,0.34386617,1,0,LMCI2
90049,-1.9821585,0.91130690,0.84525192,1.08857832,0.81594139,1.20533756,1.14453362,1.10107868,0.85536826,⋯,0.4210,2.8200,0.02167188,0.58543619,0.25306417,0.06070656,0.40663302,0,0,EMCI2
90131,-1.8884351,0.16657385,-0.30294994,0.44464835,0.91130690,0.43548163,0.20781593,0.52206336,1.10107868,⋯,0.3040,3.5800,0.05351724,0.12293814,0.11211340,0.01567010,0.18453608,0,0,EMCI2
9637,-1.8486863,1.02025380,-1.22028228,0.32696812,-0.01669960,-0.30931302,-0.30294994,-0.53822808,0.88764140,⋯,0.5215,2.6100,0.04664372,0.12472325,0.16752767,0.03848708,0.19261993,0,0,LMCI1
11151,-1.7911291,-0.58851915,-0.98515466,-0.84291807,-0.78580388,-0.28166076,-0.12204985,-1.15817407,-0.08815675,⋯,0.4175,2.5800,0.03720238,0.46960000,0.51760000,0.06680000,0.41280000,1,1,EMCI1
9238,-1.5686389,-1.23374693,-0.52865752,-1.18938312,-1.72102659,-1.19891224,-1.42468671,-1.32197569,-1.83220450,⋯,0.3830,1.3250,0.05278277,0.17925170,0.10340136,0.02605442,0.09013605,0,0,EMCI1
90013,-1.4951551,-1.63588322,0.22560819,-1.81586803,-0.83806757,-2.04336752,-0.12545496,-0.96064892,0.18983478,⋯,0.3920,3.4800,0.07894737,0.16698413,0.08539682,0.02488889,0.22095238,0,0,EMCI1


In [2]:
library(readr)
library(dplyr)
library(ggplot2)
library(ggpubr)

clinical <- readr::read_csv("/nfs/dcmb-lgarmire/boweil/EFIGA/DE_label/EFIGA_clinical.csv")

csv_file <- "/nfs/dcmb-lgarmire/boweil/EFIGA/DE_label/subLabel/All_QN_l1_QN.csv"

output_dir <- "/nfs/turbo/umms-lgarmire/home/yhdu/Bowei_NAS/EFIGA/Review/New_smallsubtype_validation_booxplots"
if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

ratio_columns <- c("P_AB40", "P_AB42", "P_NfL", "P_PT181V2","P_PT217",'P_AB42AB40ratio')

csv_data <- try(read_csv(csv_file), silent = TRUE)
if (inherits(csv_data, "try-error")) {
  message(paste("Error reading", csv_file, "- stopping."))
} else if (!all(c("ID","labels_refined") %in% colnames(csv_data))) {
  message(paste("CSV file", csv_file, "does not have required columns - stopping."))
} else {
  filtered_csv <- csv_data %>% filter(ID %in% clinical$`...1`)
  clinical_subset <- clinical %>%
    left_join(filtered_csv %>% select(ID, labels_refined),
              by = c("...1" = "ID")) %>%
    filter(!is.na(labels_refined))
  
  if (nrow(clinical_subset) == 0) {
    message(paste("No matching labels for", csv_file, "- stopping."))
  } else {
    for (ratio in ratio_columns) {
      if (!(ratio %in% colnames(clinical_subset))) {
        message(paste("Column", ratio, "not found - skipping."))
        next
      }
      y_label <- if (ratio == "P_PT181V2") {
        "P_PT181"
      } else if (ratio == "P_PT181V2AB42ratio") {
        "PT181_vs_AB42_Ratio"
      } else {
        ratio
      }
      
      df <- clinical_subset %>%
        select(labels_refined, !!sym(ratio)) %>%
        na.omit()
      colnames(df) <- c("Pred_label", "Ratio")
      
      df <- df %>%
        group_by(Pred_label) %>%
        filter(!Ratio %in% boxplot.stats(Ratio)$out) %>%
        ungroup()

      df$Pred_label <- factor(df$Pred_label,
                              levels = c("EMCI1","EMCI2","LMCI1","LMCI2"))
      
      # Kruskal–Wallis test
      kw_p <- kruskal.test(Ratio ~ Pred_label, data = df)$p.value
      
      p <- ggplot(df, aes(x = Pred_label, y = Ratio, color = Pred_label)) +
        geom_boxplot(outlier.shape = NA, fill = NA, size = 0.3) +
        geom_jitter(size = 1, width = 0.2, alpha = 0.7) +
        stat_compare_means(method = "kruskal.test",
                           label.y = Inf, vjust = 1.5, size = 4) +
        labs(x = NULL, y = y_label) +
        theme_bw(base_size = 22) +
        theme(
          panel.border   = element_rect(color = "black", fill = NA, size = 1),
          legend.position= "none",
          axis.text      = element_text(size = 23),
          axis.title     = element_text(size = 25)
        )
      
      png_filename <- file.path(
        output_dir,
        paste0(tools::file_path_sans_ext(basename(csv_file)), "_KW_", ratio, ".png")
      )
      
      ggsave(filename = png_filename, plot = p, width = 7, height = 6, dpi = 600)
      message(paste("Saved plot for", ratio, "to", png_filename))
    }
  }
}


[1m[22mNew names:
[36m•[39m `` -> `...1`
[1mRows: [22m[34m166[39m [1mColumns: [22m[34m71[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (1): labels_matching_UC
[32mdbl[39m (70): ...1, HMDB0000062, HMDB0000651, HMDB0006469, HMDB0002250, HMDB1333...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m717[39m [1mColumns: [22m[34m58[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): labels_matching_UC_big, labels_refined
[32mdbl[39m (56): ...1, HMDB0000062, HMDB0000651, HMDB0006469, HMDB0002250, HMDB1333...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column 

In [None]:
library(readr)
library(dplyr)
library(ggplot2)
library(ggpubr)

clinical <- readr::read_csv("/nfs/turbo/umms-lgarmire/home/yhdu/Bowei_NAS/EFIGA/DE_label/EFIGA_clinical.csv")

csv_file <- "/nfs/turbo/umms-lgarmire/home/yhdu/Bowei_NAS/EFIGA/DE_label/subLabel/All_QN_l1_QN.csv"

output_dir <- "/nfs/turbo/umms-lgarmire/home/yhdu/Bowei_NAS/EFIGA/Review/New_smallsubtype_validation_booxplots"
if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

ratio_columns <- c("P_AB40", "P_AB42", "P_NfL", "P_PT181V2","P_PT217",'P_AB42AB40ratio')

csv_data <- try(read_csv(csv_file), silent = TRUE)
if (inherits(csv_data, "try-error")) {
  message(paste("Error reading", csv_file, "- stopping."))
} else if (!all(c("ID","labels_refined") %in% colnames(csv_data))) {
  message(paste("CSV file", csv_file, "does not have required columns - stopping."))
} else {
  filtered_csv <- csv_data %>% filter(ID %in% clinical$`...1`)
  clinical_subset <- clinical %>%
    left_join(filtered_csv %>% select(ID, labels_refined),
              by = c("...1" = "ID")) %>%
    filter(!is.na(labels_refined))
  
  if (nrow(clinical_subset) == 0) {
    message(paste("No matching labels for", csv_file, "- stopping."))
  } else {
    for (ratio in ratio_columns) {
      if (!(ratio %in% colnames(clinical_subset))) {
        message(paste("Column", ratio, "not found - skipping."))
        next
      }
      y_label <- if (ratio == "P_PT181V2") {
        "P_PT181"
      } else if (ratio == "P_PT181V2AB42ratio") {
        "PT181_vs_AB42_Ratio"
      } else {
        ratio
      }
      
      df <- clinical_subset %>%
        select(labels_refined, !!sym(ratio)) %>%
        na.omit()
      colnames(df) <- c("Pred_label", "Ratio")
      
      df <- df %>%
        group_by(Pred_label) %>%
        filter(!Ratio %in% boxplot.stats(Ratio)$out) %>%
        ungroup()

      df$Pred_label <- factor(df$Pred_label,
                              levels = c("EMCI1","EMCI2","LMCI1","LMCI2"))
      
      # Kruskal–Wallis test
      kw_p <- kruskal.test(Ratio ~ Pred_label, data = df)$p.value
      
      p <- ggplot(df, aes(x = Pred_label, y = Ratio, color = Pred_label)) +
        geom_boxplot(outlier.shape = NA, fill = NA, size = 0.3) +
        geom_jitter(size = 1, width = 0.2, alpha = 0.7) +
        stat_compare_means(method = "kruskal.test",
                           label.y = Inf, vjust = 1.5, size = 4) +
        labs(x = NULL, y = y_label) +
        theme_bw(base_size = 22) +
        theme(
          panel.border   = element_rect(color = "black", fill = NA, size = 1),
          legend.position= "none",
          axis.text      = element_text(size = 23),
          axis.title     = element_text(size = 25)
        )
      
      png_filename <- file.path(
        output_dir,
        paste0(tools::file_path_sans_ext(basename(csv_file)), "_KW_", ratio, ".png")
      )
      
      ggsave(filename = png_filename, plot = p, width = 7, height = 6, dpi = 600)
      message(paste("Saved plot for", ratio, "to", png_filename))
    }
  }
}


In [2]:
library(readr)
library(dplyr)
library(ggplot2)
library(ggpubr)

clinical <- readr::read_csv("/nfs/turbo/umms-lgarmire/home/yhdu/Bowei_NAS/EFIGA/DE_label/EFIGA_clinical.csv")
csv_file   <- "/nfs/turbo/umms-lgarmire/home/yhdu/Bowei_NAS/EFIGA/DE_label/subLabel/All_QN_l1_QN.csv"
output_dir <- "/nfs/turbo/umms-lgarmire/home/yhdu/Bowei_NAS/EFIGA/Review/New_smallsubtype_validation_booxplots2"

if (!dir.exists(output_dir)) dir.create(output_dir, recursive = TRUE)

ratio_columns <- c("P_AB40", "P_AB42", "P_NfL", "P_PT181V2","P_PT217","P_AB42AB40ratio")

nice_labels <- c(
  P_AB42AB40ratio = "Aβ42/Aβ40 Ratio",
  P_PT181V2       = "P-tau181",
  P_PT217         = "P-tau217",
  P_AB40          = "Aβ40",
  P_AB42          = "Aβ42",
  P_NfL           = "NfL"
)


# pairwise comparisons to annotate
comparisons <- list(
  c("EMCI1", "EMCI2"),
  c("EMCI1", "LMCI1"),
  c("EMCI1", "LMCI2"),
  c("EMCI2", "LMCI1"),
  c("EMCI2", "LMCI2"),
  c("LMCI1", "LMCI2")
)

csv_data <- try(read_csv(csv_file), silent = TRUE)
if (inherits(csv_data, "try-error")) {
  message(paste("Error reading", csv_file, "- stopping."))
} else if (!all(c("ID","labels_refined") %in% colnames(csv_data))) {
  message(paste("CSV file", csv_file, "does not have required columns - stopping."))
} else {

  filtered_csv <- csv_data %>% filter(ID %in% clinical$`...1`)

  clinical_subset <- clinical %>%
    left_join(filtered_csv %>% select(ID, labels_refined),
              by = c("...1" = "ID")) %>%
    filter(!is.na(labels_refined))

  if (nrow(clinical_subset) == 0) {
    message(paste("No matching labels for", csv_file, "- stopping."))
  } else {

    for (ratio in ratio_columns) {

      if (!(ratio %in% colnames(clinical_subset))) {
        message(paste("Column", ratio, "not found - skipping."))
        next
      }

      y_label <- if (ratio == "P_PT181V2") {
        "P_PT181"
      } else if (ratio == "P_PT181V2AB42ratio") {
        "PT181_vs_AB42_Ratio"
      } else {
        ratio
      }

      df <- clinical_subset %>%
        select(labels_refined, !!sym(ratio)) %>%
        na.omit()

      colnames(df) <- c("Pred_label", "Ratio")

      # remove univariate outliers within each subtype
      df <- df %>%
        group_by(Pred_label) %>%
        filter(!Ratio %in% boxplot.stats(Ratio)$out) %>%
        ungroup()

      df$Pred_label <- factor(
        df$Pred_label,
        levels = c("EMCI1", "EMCI2", "LMCI1", "LMCI2")
      )

      # pairwise Wilcoxon tests
stat_df <- compare_means(
  Ratio ~ Pred_label,
  data        = df,
  method      = "wilcox.test",
  comparisons = comparisons
)

# build labels + staggered y positions
y_max  <- max(df$Ratio, na.rm = TRUE)
y_min  <- min(df$Ratio, na.rm = TRUE)
y_rng  <- ifelse(y_max > y_min, y_max - y_min, 1)   # avoid 0 range

stat_df <- stat_df %>%
  mutate(
    label      = ifelse(p > 0.05, "ns", paste0("p=", round(p, 3))),
    # base a bit above data, then step up per comparison
    y.position = y_max + 0.05 * y_rng + (row_number() - 1) * 0.07 * y_rng
  )
#stat_df$label <- paste0("p = ", round(stat_df$p, 4))

y_label <- nice_labels[[ratio]]

p <- ggplot(df, aes(x = Pred_label, y = Ratio, color = Pred_label)) +
  geom_boxplot(outlier.shape = NA, fill = NA, size = 0.3) +
  geom_jitter(size = 1, width = 0.2, alpha = 0.7) +
  theme_bw(base_size = 15) +
  theme(
    panel.border    = element_rect(color = "black", fill = NA, size = 1),
    axis.text       = element_text(size = 15),
    axis.title      = element_text(size = 15)
  ) +
  labs(x = NULL, y = y_label) +
  stat_pvalue_manual(
    stat_df,
    label      = "label",
    xmin       = "group1",
    xmax       = "group2",
    y.position = "y.position",
    tip.length = 0.01,
    size       = 3
  )


      png_filename <- file.path(
        output_dir,
        paste0(tools::file_path_sans_ext(basename(csv_file)),
               "_pairwiseWilcox_", ratio, ".png")
      )

      ggsave(png_filename, p, width = 6.5, height = 5, dpi = 600)
      message(paste("Saved plot for", ratio, "to", png_filename))
    }
  }
}


[1m[22mNew names:
[36m•[39m `` -> `...1`
[1mRows: [22m[34m166[39m [1mColumns: [22m[34m71[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (1): labels_matching_UC
[32mdbl[39m (70): ...1, HMDB0000062, HMDB0000651, HMDB0006469, HMDB0002250, HMDB1333...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.
[1mRows: [22m[34m717[39m [1mColumns: [22m[34m58[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m  (2): labels_matching_UC_big, labels_refined
[32mdbl[39m (56): ...1, HMDB0000062, HMDB0000651, HMDB0006469, HMDB0002250, HMDB1333...

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column 