In [None]:
library(readr)
library(dplyr)
library(clusterProfiler)
library(org.Dm.eg.db)

m_t2g <- readRDS("~/Dropbox @RU Dropbox/Pyonghwa Kim/snRNA-seq (w: Cao lab)/2024:07:08/m_t2g_ver2.rds")
setwd("~/Dropbox @RU Dropbox/Pyonghwa Kim/snRNA-seq (w: Cao lab)/2025:07:15 (PSEA again)")

WTLD_combined <- read_csv("~/Dropbox @RU Dropbox/Pyonghwa Kim/snRNA-seq (w: Cao lab)/2025:07:15 (PSEA again)/WTLD_combined.csv")
WTDD_combined <- read_csv("~/Dropbox @RU Dropbox/Pyonghwa Kim/snRNA-seq (w: Cao lab)/2025:07:15 (PSEA again)/WTDD_combined.csv")
MTLD_combined <- read_csv("~/Dropbox @RU Dropbox/Pyonghwa Kim/snRNA-seq (w: Cao lab)/2025:07:15 (PSEA again)/MTLD_combined.csv")
MTDD_combined <- read_csv("~/Dropbox @RU Dropbox/Pyonghwa Kim/snRNA-seq (w: Cao lab)/2025:07:15 (PSEA again)/MTDD_combined.csv")

# Load data
WTLD_combined <- read_csv("MTDD_combined.csv")
df_gene <- read_csv("df_gene.csv")

# Join FlyBase IDs
WTLD_combined <- WTLD_combined %>%
  left_join(
    dplyr::select(df_gene, gene_short_name, FB_number),
    by = c("CycID" = "gene_short_name")
  ) %>%
  filter(!is.na(FB_number)) %>%
  dplyr::rename(FB = FB_number)

# Map FB â†’ Entrez
fb_to_entrez <- bitr(unique(WTLD_combined$FB),
                     fromType = "FLYBASE",
                     toType = "ENTREZID",
                     OrgDb = org.Dm.eg.db)

WTLD_combined <- WTLD_combined %>%
  left_join(fb_to_entrez, by = c("FB" = "FLYBASE")) %>%
  filter(!is.na(ENTREZID))

# Group and split safely
grouped <- WTLD_combined %>%
  dplyr::select(cell_type, ENTREZID, avg_phase) %>%
  dplyr::group_by(cell_type)

split_list <- dplyr::group_split(grouped)
cell_type_names <- dplyr::group_keys(grouped) %>% pull(cell_type)

# Write files
for (i in seq_along(split_list)) {
  df <- split_list[[i]] %>% dplyr::distinct(ENTREZID, .keep_all = TRUE)
  file_name <- paste0("file_", cell_type_names[i], ".txt")
  write.table(df[, c("ENTREZID", "avg_phase")],
              file = file_name,
              sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)
}

In [None]:
###cd "/Users/pyonghwakim/Dropbox @RU Dropbox/Pyonghwa Kim/snRNA-seq (w: Cao lab)/Circadian analysis/PSEA-master"
###java -jar PSEA1.1_VectorGraphics.jar

# JTK ARS LS median 1.0 fold change threshold 1

##/Users/pyonghwakim/Dropbox @RU Dropbox/Pyonghwa Kim/snRNA-seq (w: Cao lab)/Circadian analysis/PSEA-master/c5.all.works.gmt

In [None]:
# Load necessary libraries
library(dplyr)
library(ggplot2)
library(tidyr)
library(gridExtra)

# Load m_t2g beforehand
# m_t2g <- read.csv("m_t2g.csv")  # uncomment and adjust path if needed

# List all results files matching the pattern
results_files <- list.files(pattern = "results_.*\\.txt$", full.names = TRUE)

# Initialize output list
combined_results_by_cell_type <- list()

# Loop through all result files
for (file in results_files) {
  # Extract cell type from filename
  cell_type <- gsub("results_(.*)\\.txt$", "\\1", basename(file))
  
  # Read file
  results <- tryCatch({
    read.csv2(file, sep = "\t", stringsAsFactors = FALSE)
  }, error = function(e) {
    message("Skipping unreadable file: ", file)
    return(NULL)
  })
  
  # Skip if failed or empty
  if (is.null(results) || nrow(results) == 0) {
    message("Skipping empty file: ", file)
    next
  }
  
  # Add Cluster column and reorder
  results$Cluster <- NA
  results <- dplyr::select(results, Cluster, everything())
  
  # Rename columns (adjust if column structure differs)
  colnames(results) <- c("Cluster", "Set ID", "Set N",
                         "Kuiper p-value (vs. background)",
                         "Kuiper q-value (vs. background)",
                         "Kuiper p-value (vs. uniform)",
                         "Kuiper q-value (vs. uniform)",
                         "Vector-average magnitude", "Vector-average value")
  
  # Filter out NA and keep significant
  results <- results[!is.na(results$`Kuiper p-value (vs. background)`), ]
  results$`Kuiper p-value (vs. background)` <- as.numeric(as.character(results$`Kuiper p-value (vs. background)`))
  results <- results[results$`Kuiper p-value (vs. background)` < 0.05, ]
  
  # Skip if no significant results
  if (nrow(results) == 0) {
    message("Skipping non-significant file: ", file)
    next
  }
  
  # Annotate GO terms
  results$GO <- NA
  matching <- match(results$`Set ID`, m_t2g$gs_name)
  valid <- !is.na(matching)
  results$GO[valid] <- m_t2g$gs_exact_source[matching[valid]]
  
  results$specificCluster <- NA
  results$Cluster <- NA
  matching2 <- match(results$GO, m_t2g$gs_exact_source)
  valid2 <- !is.na(matching2)
  results$specificCluster[valid2] <- m_t2g$parentTerm[matching2[valid2]]
  results$Cluster[valid2] <- m_t2g$parentTerm_broad[matching2[valid2]]
  
  # Store per-cell type
  combined_results_by_cell_type[[cell_type]] <- results
}

# --------------------------
# Combine into one data frame for plotting
# --------------------------

# Get all cell types
cell_types <- names(combined_results_by_cell_type)

# Convert vector-average value to numeric (safely)
for (cell_type in cell_types) {
  combined_results_by_cell_type[[cell_type]]$`Vector-average value` <-
    as.numeric(as.character(combined_results_by_cell_type[[cell_type]]$`Vector-average value`))
}

# Build combined data frame
plot_data <- bind_rows(
  lapply(cell_types, function(ct) {
    df <- combined_results_by_cell_type[[ct]]
    df$CellType <- ct
    return(df)
  })
)

# write.csv(plot_data, "WTLD.csv", row.names = FALSE)