In [256]:
library(limma)
library(dplyr)
library(hipathia)
library(ggvenn)
library(reshape2)
library(fmsb)

In [257]:
pathways <- load_pathways("hsa")

Loaded 146 pathways



In [258]:
funct_limma <- function(project_tcga){
    proj_tumorvsctrl <- data.frame(tcga=c("tcga_brca", "tcga_coad","tcga_esca","tcga_lgg","tcga_luad","tcga_lusc","tcga_paad","tcga_skcm","tcga_stad"),
                          gtex=c("Breast","Colon","Esophagus","Brain","Lung","Lung","Pancreas","Skin","Stomach"))
    rownames(proj_tumorvsctrl) <- proj_tumorvsctrl$tcga

    donors_sex_age_tissue <- readRDS("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/02_selection_samples/02_2_ext_information_patients_gtex/donors_sex_age_tissue.rds")
    donors_sex_age_tissue_selproj <- donors_sex_age_tissue[which(donors_sex_age_tissue$tissue == proj_tumorvsctrl[project_tcga,"gtex"]),]

    patients_geneexp_clind_drug_samptype_selproj <- readRDS(paste("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/02_selection_samples/02_1_ext_information_patients_tcga/",project_tcga,"/patients_geneexp_clind_drug_samptype_",project_tcga,".rds", sep=""))
    patients_tcga_selproj <- as.data.frame(patients_geneexp_clind_drug_samptype_selproj %>% filter(sample_type == "Primary Solid Tumor") %>% select(gender, patient))

    pathvals_gtex <- readRDS("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/01_mechanistics_models/01_2_mechanistics_models_gtex/path_vals_gtex.rds")
    pathvals_tcga <- readRDS(paste("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/01_mechanistics_models/01_1_mechanistics_models_tcga/",project_tcga,"/pathvals_",project_tcga,".rds", sep=""))

    physiological_paths <- read.table("/mnt/lustre/scratch/CBRA/projects/heterogeneity/data/physiological_paths.tsv", sep="\t") 

    pathvals_gtex_male <- pathvals_gtex[unlist(lapply(physiological_paths$V2, FUN=function(x) grep(x, rownames(pathvals_gtex)))),
                                                as.data.frame(donors_sex_age_tissue_selproj %>% filter(sex=="male") %>% select(patients))$patients]
    pathvals_tcga_male <- pathvals_tcga[unlist(lapply(physiological_paths$V2, FUN=function(x) grep(x, rownames(pathvals_tcga)))),
                                                  as.data.frame(patients_tcga_selproj %>% filter(gender=="MALE") %>% select(patient))$patient]     
    pathvals_male <- cbind(pathvals_gtex_male, pathvals_tcga_male)                                                  

    tipo_male <- colnames(pathvals_male)
    tipo_male[grep("GTEX",tipo_male)] <- "ctrl"
    tipo_male[grep("TCGA",tipo_male)] <- "tum"

    design_male <- model.matrix(~tipo_male)
    fit_male <- lmFit(pathvals_male, design_male)
    fit2_male <- eBayes(fit_male)
    top_table_male <- topTable(fit2_male, number = Inf, p.value = 1, coef = "tipo_maletum", adjust="fdr")

    top_table_male_sign <- top_table_male[top_table_male$adj.P.Val < 0.05,]
    top_table_male_sign$circ_hip <- rownames(top_table_male_sign)                                                    
    top_table_male_sign$circ <- get_path_names(pathways,rownames(top_table_male_sign))
    top_table_male_sign$sex <- rep("male",length(top_table_male_sign$circ))
    top_table_male_sign$project <- rep(project_tcga,length(top_table_male_sign$circ))   

    pathvals_gtex_female <- pathvals_gtex[unlist(lapply(physiological_paths$V2, FUN=function(x) grep(x, rownames(pathvals_gtex)))),
                                                as.data.frame(donors_sex_age_tissue_selproj %>% filter(sex=="female") %>% select(patients))$patients]
    pathvals_tcga_female <- pathvals_tcga[unlist(lapply(physiological_paths$V2, FUN=function(x) grep(x, rownames(pathvals_tcga)))),
                                                  as.data.frame(patients_tcga_selproj %>% filter(gender=="FEMALE") %>% select(patient))$patient]     
    pathvals_female <- cbind(pathvals_gtex_female, pathvals_tcga_female)                                                  

    tipo_female <- colnames(pathvals_female)
    tipo_female[grep("GTEX",tipo_female)] <- "ctrl"
    tipo_female[grep("TCGA",tipo_female)] <- "tum"

    design_female <- model.matrix(~tipo_female)
    fit_female <- lmFit(pathvals_female, design_female)
    fit2_female <- eBayes(fit_female)
    top_table_female <- topTable(fit2_female, number = Inf, p.value = 1, coef = "tipo_femaletum", adjust="fdr")

    top_table_female_sign <- top_table_female[top_table_female$adj.P.Val < 0.05,]
    top_table_female_sign$circ_hip <- rownames(top_table_female_sign)                                                    
    top_table_female_sign$circ <- get_path_names(pathways,rownames(top_table_female_sign))
    top_table_female_sign$sex <- rep("female",length(top_table_female_sign$circ))
	top_table_female_sign$project <- rep(project_tcga,length(top_table_female_sign$circ))   

    top_table_sign <- rbind(top_table_male_sign, top_table_female_sign)
    return(top_table_sign)                                                    
                                                  
}

In [259]:
proj_tumorvsctrl <- data.frame(tcga=c("tcga_brca", "tcga_coad","tcga_esca","tcga_lgg","tcga_luad","tcga_lusc","tcga_paad","tcga_skcm","tcga_stad"),
                          gtex=c("Breast","Colon","Esophagus","Brain","Lung","Lung","Pancreas","Skin","Stomach"))
rownames(proj_tumorvsctrl) <- proj_tumorvsctrl$tcga

In [260]:
circs_sig_dif <- do.call(rbind, lapply(proj_tumorvsctrl$tcga, FUN=function(x) funct_limma(x)))
saveRDS(circs_sig_dif,"/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/03_limma_pathvals/limma_pathvals.rds")                                        

In [261]:
func_counts_dif_circs_sex <- function(x){
    circs_sign_male <- data.frame(circs_sig_dif %>% filter(project==x, sex == "male") %>% select(circ))$circ
    circs_sign_female <- data.frame(circs_sig_dif %>% filter(project==x, sex == "female") %>% select(circ))$circ

    circs_both_sex <- intersect(circs_sign_female,circs_sign_male) 
    circs_only_female <- setdiff(circs_sign_female,circs_sign_male) 
    circs_only_male <- setdiff(circs_sign_male,circs_sign_female) 
    
    circs_both_sex_difsigffc <-
    unique(as.data.frame(circs_sig_dif[circs_sig_dif$circ %in% intersect(circs_sign_female, circs_sign_male),] %>% 
filter(project==x) %>% 
mutate(sign_logFC = case_when(
    logFC > 0 ~ "+",
    logFC < 0 ~ "-")) %>% group_by(circ, sign_logFC) %>% summarise(n=n()) %>% 
filter(n!=2))$circ)
    df <- data.frame("project"=x, "n_circs_only_male" = length(circs_only_male), 
                     "n_circs_only_female" = length(circs_only_female), "n_circs_both_sex" = length(circs_both_sex), 
                    "n_circs_both_sex_dif_sign_logFC"= length(circs_both_sex_difsigffc)) 
    return(df)
    }

In [262]:
### escribir tabla resumen

In [263]:
do.call(rbind, lapply(proj_tumorvsctrl$tcga, FUN=function(x) func_counts_dif_circs_sex(x)))

[1m[22m`summarise()` has grouped output by 'circ'. You can override using the
`.groups` argument.
[1m[22m`summarise()` has grouped output by 'circ'. You can override using the
`.groups` argument.
[1m[22m`summarise()` has grouped output by 'circ'. You can override using the
`.groups` argument.
[1m[22m`summarise()` has grouped output by 'circ'. You can override using the
`.groups` argument.
[1m[22m`summarise()` has grouped output by 'circ'. You can override using the
`.groups` argument.
[1m[22m`summarise()` has grouped output by 'circ'. You can override using the
`.groups` argument.
[1m[22m`summarise()` has grouped output by 'circ'. You can override using the
`.groups` argument.
[1m[22m`summarise()` has grouped output by 'circ'. You can override using the
`.groups` argument.
[1m[22m`summarise()` has grouped output by 'circ'. You can override using the
`.groups` argument.


project,n_circs_only_male,n_circs_only_female,n_circs_both_sex,n_circs_both_sex_dif_sign_logFC
<chr>,<int>,<int>,<int>,<int>
tcga_brca,103,96,726,6
tcga_coad,62,25,815,0
tcga_esca,179,17,731,11
tcga_lgg,26,8,932,0
tcga_luad,70,26,795,1
tcga_lusc,76,19,838,0
tcga_paad,40,26,815,0
tcga_skcm,111,37,753,1
tcga_stad,104,19,740,1


In [264]:
pathvals_gtex <- readRDS("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/01_mechanistics_models/01_2_mechanistics_models_gtex/path_vals_gtex.rds")
physiological_paths <- read.table("/mnt/lustre/scratch/CBRA/projects/heterogeneity/data/physiological_paths.tsv", sep="\t") 

In [265]:
funct_porc_paths_affect_sex <- function(tcga) {

    n_circs_path <- data.frame(
    "paths"= physiological_paths$V1,
    "n_circs"=unlist(lapply(physiological_paths$V2, FUN=function(x) length(grep(x,rownames(pathvals_gtex))))))
    rownames(n_circs_path) <- n_circs_path$paths
    
    path_phys <- n_circs_path$paths
    project <- rep(tcga, length(path_phys))
    porc_female <- (unlist(lapply(physiological_paths$V1, FUN=function(x) 
    length(grep(x,data.frame(circs_sig_dif %>% filter(project==tcga, sex=="female") 
                             %>% select(circ))$circ))))/n_circs_path$n_circs)*100
    porc_male <- (unlist(lapply(physiological_paths$V1, FUN=function(x) 
    length(grep(x,data.frame(circs_sig_dif %>% filter(project==tcga, sex=="male")                              
                             %>% select(circ))$circ))))/n_circs_path$n_circs)*100
    porc_paths_affect_sex <- data.frame("project"=project, "path_phys"=path_phys,
                                    "porc_female"=porc_female, "porc_male"=porc_male)
    return(porc_paths_affect_sex)
}                            

In [266]:
porc_paths_affect_sex <- do.call(rbind, lapply(proj_tumorvsctrl$tcga, FUN=function(x) funct_porc_paths_affect_sex(x)))

In [267]:
porc_paths_affect_sex_csv <- as.data.frame(porc_paths_affect_sex %>% 
    mutate(project_name = case_when(project == "tcga_brca" ~ "Breast invasive carcinoma",
                                    project == "tcga_coad" ~ "Colon adenocarcinoma",
                                    project == "tcga_esca" ~ "Esophageal carcinoma",
                                    project == "tcga_lgg" ~ "Brain Lower Grade Glioma",
                                    project == "tcga_luad" ~ "Lung adenocarcinoma",
                                    project == "tcga_lusc" ~ "Lung squamous cell carcinoma",
                                    project == "tcga_paad" ~ "Pancreatic Adenocarcinoma",
                                    project == "tcga_skcm" ~ "Skin Cutaneous Melanoma",
                                    project == "tcga_stad" ~ "Stomach adenocarcinoma")))[,c("project_name","path_phys","porc_female","porc_male")]
colnames(porc_paths_affect_sex_csv) <- c("cancer_type","physiological pathways","porc_sdcirc_female","porc_sdcirc_male")
write.csv(porc_paths_affect_sex_csv, file="/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/03_limma_pathvals/perc_circs_sign_diff_in_paths.csv", row.names=F) 

In [272]:
jpeg("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/03_limma_pathvals/heatmap_porc_circ_sign_sex_limma_pathways.jpeg", quality = 100, width = 550, height = 800)

melt(porc_paths_affect_sex %>% 
    mutate(same_porc_aprox = case_when(porc_female == porc_male ~ "YES",
                                 porc_female - porc_male <= 10 ~ "YES",
                        porc_female != porc_male  ~ "NO")) %>% 
    filter(same_porc_aprox == "NO")) %>% 
    mutate(project_name = case_when(project == "tcga_brca" ~ "Breast invasive carcinoma",
                                    project == "tcga_coad" ~ "Colon adenocarcinoma",
                                    project == "tcga_esca" ~ "Esophageal carcinoma",
                                    project == "tcga_lgg" ~ "Brain Lower Grade Glioma",
                                    project == "tcga_luad" ~ "Lung adenocarcinoma",
                                    project == "tcga_lusc" ~ "Lung squamous cell carcinoma",
                                    project == "tcga_paad" ~ "Pancreatic Adenocarcinoma",
                                    project == "tcga_skcm" ~ "Skin Cutaneous Melanoma",
                                    project == "tcga_stad" ~ "Stomach adenocarcinoma")) %>% 

ggplot(aes(x = variable, y = path_phys, fill = value)) +
  geom_tile(color = "white") + 
  facet_grid(project_name ~ ., scales = "free_y", space = "free_y") +
  labs(title = "", x = "Sex", y = "Physiological pathways", fill = "Percentage", color = "Legend Title\n") + 
  theme_minimal() + 
  scale_x_discrete(labels = c("porc_female" = "Female", "porc_male" = "Male")) +
  scale_fill_gradient(low = "#F39B7F", high = "#A73030") + 
  theme(
        strip.text.y = element_text(size = 10, angle = 360),
        strip.background = element_rect(colour = "grey", fill = "#66B9B9"),
        legend.position = "bottom", 
        legend.direction = "horizontal",
        legend.text = element_text(size = 10),
        axis.text.x = element_text(size = 10, vjust = 0.5, angle = 90),
        axis.text.y = element_text(size = 10, vjust = 0.5),
        plot.title = element_text(hjust = 0, size = 14),
        panel.grid = element_blank(),  # Remove grid lines from the plot
        panel.background = element_blank(),  # Ensure the background doesn't override strip colors
        panel.border = element_blank()  # Remove border for a cleaner look
  )

dev.off()


Using project, path_phys, same_porc_aprox as id variables



In [214]:
n_circs_path <- data.frame(
    "paths"= physiological_paths$V1,
    "n_circs"=unlist(lapply(physiological_paths$V2, FUN=function(x) length(grep(x,rownames(pathvals_gtex))))))
rownames(n_circs_path) <- n_circs_path$paths
dim(n_circs_path)                            

In [215]:
n_circs_sig_dif_sex_project <- as.data.frame(circs_sig_dif %>% mutate(
    paths = unlist(lapply(circ, FUN=function(x) unlist(strsplit(x,":"))[1]))) %>%
    mutate(sex_project=paste(sex, project, sep="_")) %>%
                          group_by(sex_project, paths) %>% summarise(n=n())) 
          
n_circs_sig_dif_sex_project$perc_sdcirc_paths <- unlist(
lapply(1:length(n_circs_sig_dif_sex_project$paths), FUN=function(x) 
    as.numeric(n_circs_sig_dif_sex_project$n[x])/n_circs_path[n_circs_sig_dif_sex_project$paths[x],"n_circs"]*100))

write.csv(dcast(n_circs_sig_dif_sex_project, paths ~ sex_project), file="/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/03_limma_pathvals/perc_circs_sign_diff_in_paths_cancer_type.csv", row.names=F)        

[1m[22m`summarise()` has grouped output by 'sex_project'. You can override using the
`.groups` argument.
Using perc_sdcirc_paths as value column: use value.var to override.



In [216]:
paths_n_circ_all_ct_bs <- data.frame(table(unlist(lapply(data.frame(circs_sig_dif %>% group_by(sex,circ) %>% summarise(n=n()) %>% filter(n==9) %>% group_by(circ) %>% 
summarise(n=n()) %>% filter(n==2))$circ, FUN=function(x) unlist(strsplit(x,":"))[1])))) 

paths_n_circ_all_ct_fem <- data.frame(table(unlist(lapply(data.frame(circs_sig_dif %>% filter(circ %in%
data.frame(circs_sig_dif %>% group_by(sex,circ) %>% summarise(n=n()) %>% filter(n==9) %>% group_by(circ) %>% 
summarise(n=n()) %>% filter(n==1))$circ) %>% group_by(sex,circ) %>% summarise(n=n()) %>% 
                         filter(n==9 & sex == "female"))$circ, FUN=function(x) unlist(strsplit(x,":"))[1]))))

paths_n_circ_all_ct_mal <- data.frame(table(unlist(lapply(data.frame(circs_sig_dif %>% filter(circ %in%
data.frame(circs_sig_dif %>% group_by(sex,circ) %>% summarise(n=n()) %>% filter(n==9) %>% group_by(circ) %>% 
summarise(n=n()) %>% filter(n==1))$circ) %>% group_by(sex,circ) %>% summarise(n=n()) %>% 
                               filter(n==9 & sex == "male"))$circ, FUN=function(x) unlist(strsplit(x,":"))[1]))))

[1m[22m`summarise()` has grouped output by 'sex'. You can override using the `.groups`
argument.
[1m[22m`summarise()` has grouped output by 'sex'. You can override using the `.groups`
argument.
[1m[22m`summarise()` has grouped output by 'sex'. You can override using the `.groups`
argument.
[1m[22m`summarise()` has grouped output by 'sex'. You can override using the `.groups`
argument.
[1m[22m`summarise()` has grouped output by 'sex'. You can override using the `.groups`
argument.


In [217]:
paths_n_circ_all_ct_bs$perc_circ_bs <- paths_n_circ_all_ct_bs$Freq/n_circs_path[paste(paths_n_circ_all_ct_bs$Var1),"n_circs"]*100
paths_n_circ_all_ct_fem$perc_circ_fem <- paths_n_circ_all_ct_fem$Freq/n_circs_path[paste(paths_n_circ_all_ct_fem$Var1),"n_circs"]*100
paths_n_circ_all_ct_mal$perc_circ_mal <- paths_n_circ_all_ct_mal$Freq/n_circs_path[paste(paths_n_circ_all_ct_mal$Var1),"n_circs"]*100

In [218]:
perc_path_aff_all_ct <- merge(
merge(paths_n_circ_all_ct_bs, paths_n_circ_all_ct_fem, by.x="Var1", by.y="Var1", all.x=T),
      paths_n_circ_all_ct_mal, by.x="Var1", by.y="Var1", all.x=T)[,c("Var1", "perc_circ_bs", "perc_circ_fem", "perc_circ_mal")]

In [278]:
jpeg("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/03_limma_pathvals/heatmap_porc_circ_sign_sex_all_cancer_type_limma_pathways.jpeg", quality = 100, width = 450, height = 700)
melt(perc_path_aff_all_ct %>% filter(perc_circ_fem > 0 | perc_circ_mal > 0)) %>% 
ggplot(aes(x= variable, y = Var1, fill = value)) +
  geom_tile(color="white") + #facet_grid(project ~ ., scales = "free_y", space = "free_y") +
  scale_fill_gradient(low = "#E3C9EB", high = "#844288", na.value="white") + 
  labs(title = "", x = "Sex", y = "Physiological pathways", fill="Percentage" ,color = "Legend Title\n") + 
    theme_minimal() + scale_x_discrete(labels=c("Both sex","Male","Female")) +
    theme(strip.text.y = element_text(size=10, angle=360), 
        legend.position="right", legend.direction="vertical",
        legend.text=element_text(size=10),
        axis.text.x=element_text(size=10,vjust=0.5, angle = 90),
        axis.text.y=element_text(size=10,vjust=0.5),
        plot.title=element_text(hjust=0, size=14))  
dev.off()

Using Var1 as id variables



In [220]:
paths_hallmarks <- read.table("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/data/paths_hallmarks.tsv",sep="\t",header = T)
hallmarks <- paths_hallmarks[,-c(1,2)]
rownames(hallmarks) <- paths_hallmarks[,2]

In [221]:
physiological_paths <- read.table("/mnt/lustre/scratch/CBRA/projects/heterogeneity/data/physiological_paths.tsv", sep="\t") 

In [222]:
hallmarks_phys <- hallmarks[unlist(lapply(physiological_paths$V1, FUN=function(x) grep(x, rownames(hallmarks)))),]

In [223]:
hallmarks_male <- data.frame(do.call(rbind,lapply(proj_tumorvsctrl$tcga, FUN=function(x)
apply(na.omit(hallmarks_phys[data.frame(circs_sig_dif %>% filter(project == x & sex=="male"))$circ,]), 2, sum))))
hallmarks_male$project <- proj_tumorvsctrl$tcga                                         
hallmarks_male$sex <- rep("male", length(proj_tumorvsctrl$tcga))                                                                              

In [224]:
hallmarks_female <- data.frame(do.call(rbind,lapply(proj_tumorvsctrl$tcga, FUN=function(x)
apply(na.omit(hallmarks_phys[data.frame(circs_sig_dif %>% filter(project == x & sex=="female"))$circ,]), 2, sum))))
hallmarks_female$project <- proj_tumorvsctrl$tcga                                         
hallmarks_female$sex <- rep("female", length(proj_tumorvsctrl$tcga))                                                                              

In [225]:
hallmarks_tcga <- rbind(hallmarks_male, hallmarks_female)

In [226]:
colnames(hallmarks_tcga)[colnames(hallmarks_tcga) == "INVASION.AND.METASTASIS"] <- "Invasion_and_metastasis"
colnames(hallmarks_tcga)[colnames(hallmarks_tcga) == "IMMUNE.DESTRUCTION"] <- "Immune_destruction"
colnames(hallmarks_tcga)[colnames(hallmarks_tcga) == "CELLULAR.ENERGETICS"] <- "Cellular_energetics"
colnames(hallmarks_tcga)[colnames(hallmarks_tcga) == "REPLICATIVE.IMMORTALITY"] <- "Replicate_immortality"
colnames(hallmarks_tcga)[colnames(hallmarks_tcga) == "EVADING.GROWTH.SUPPRESSORS"] <- "Evading_growth_suppressors"
colnames(hallmarks_tcga)[colnames(hallmarks_tcga) == "GENOME.INSTABILITY.AND.MUTATION"] <- "Genome_instability_and_mutation"
colnames(hallmarks_tcga)[colnames(hallmarks_tcga) == "INDUCING.ANGIOGENESIS"] <- "Inducing_angiogenesis"
colnames(hallmarks_tcga)[colnames(hallmarks_tcga) == "RESISTING.CELL.DEATH"] <- "Resisting_cell_death"
colnames(hallmarks_tcga)[colnames(hallmarks_tcga) == "SUSTAINING.PROLIFERATIVE.SIGNALING"] <- "Sustaining_proliferative_signaling"
colnames(hallmarks_tcga)[colnames(hallmarks_tcga) == "TUMOR.PROMOTING.INFLAMMATION"] <- "Tumor_promoting_inflammation"

In [227]:
scores <- hallmarks_tcga[,-c(11,12)]
normalized_scores <- apply(scores, 2, function(x) (x - min(x)) / (max(x) - min(x)))

In [228]:
names_project <- data.frame("project" = c("tcga_brca","tcga_coad","tcga_esca","tcga_lgg","tcga_luad","tcga_lusc","tcga_paad","tcga_skcm","tcga_stad"),
                            "name"=c("Breast invasive carcinoma","Colon adenocarcinoma","Esophageal carcinoma","Brain Lower Grade Glioma","Lung adenocarcinoma","Lung squamous cell carcinoma","Pancreatic Adenocarcinoma","Skin Cutaneous Melanoma","Stomach adenocarcinoma")) 
rownames(names_project) <- names_project$project

In [229]:
radar_plot_per_project <- function(project){
df_project <- data.frame(normalized_scores[which(hallmarks_tcga$project == project),])
max_min <- data.frame(
  Invasion_and_metastasis = c(1, 0),
  Immune_destruction = c(1, 0),
  Cellular_energetics = c(1, 0),
  Replicate_immortality = c(1, 0),
  Evading_growth_suppressors = c(1, 0),
  Genome_instability_and_mutation = c(1, 0),
  Inducing_angiogenesis = c(1, 0),
  Resisting_cell_death = c(1, 0),
  Sustaining_proliferative_signaling = c(1, 0),
  Tumor_promoting_inflammation = c(1, 0)
)

# Combine the max_min and df
df_project <- rbind(max_min, df_project)

colors_border <- c("blue", "red")

jpeg(paste("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/03_limma_pathvals/hallmark_limma_pathways",project,".jpeg",sep=""), quality = 100, width = 850, height = 700)

# Create the radar chart
radarchart(
  df_project,
  # Customizing the plot
  pcol = colors_border,
 # pfcol = colors_fill,
  plwd = 2,
  plty = 1,
  cglcol = "grey",
  cglty = 1,
  axislabcol = "black",
  caxislabels = seq(0, 1, 0.2),
  vlcex = 0.8,
)
legend(
  x = 1.2, y = 1,
  legend = hallmarks_tcga[which(hallmarks_tcga$project == project),"sex"],
  bty = "n", pch = 20,
  col = colors_border, text.col = "black", cex = 0.8
)
title(main = names_project[project,"name"])
dev.off()
}

In [230]:
lapply(names_project$project, FUN=function(x) radar_plot_per_project(x))

In [231]:
hallmarks_tcga[which(hallmarks_tcga$project == "tcga_brca"),"sex"]

In [232]:
library(fmsb)
library(png)  # For loading JPEG images

In [250]:
img <- readPNG("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/03_limma_pathvals/hallmarks_index.png")

In [251]:
df_female <- data.frame(normalized_scores[which(hallmarks_tcga$sex == "female"),])
df_male <- data.frame(normalized_scores[which(hallmarks_tcga$sex == "male"),])

# Add maximum and minimum rows for scaling
max_min <- data.frame(
  Invasion_and_metastasis = c(1, 0),
  Immune_destruction = c(1, 0),
  Cellular_energetics = c(1, 0),
  Replicate_immortality = c(1, 0),
  Evading_growth_suppressors = c(1, 0),
  Genome_instability_and_mutation = c(1, 0),
  Inducing_angiogenesis = c(1, 0),
  Resisting_cell_death = c(1, 0),
  Sustaining_proliferative_signaling = c(1, 0),
  Tumor_promoting_inflammation = c(1, 0)
)

# Combine the max_min and df
df_female <- rbind(max_min, df_female)
df_male <- rbind(max_min, df_male)


colors_border <- c("red", "blue", "green", "orange", "purple", "cyan", "magenta", "brown", "yellow")

jpeg("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/03_limma_pathvals/hallmark_female_limma_pathways.jpeg", quality = 100, width = 850, height = 700)
radarchart(
  df_female[,c('Evading_growth_suppressors','Sustaining_proliferative_signaling','Cellular_energetics','Resisting_cell_death','Genome_instability_and_mutation','Inducing_angiogenesis','Invasion_and_metastasis','Tumor_promoting_inflammation','Replicate_immortality','Immune_destruction')],
  # Customizing the plot
  pcol = colors_border,
  # pfcol = colors_fill,
  plwd = 2,
  plty = 1,
  cglcol = "grey",
  cglty = 1,
  axislabcol = "black",
  caxislabels = seq(0, 1, 0.2),
  vlcex = 0, # No axis labels,
    vlabels = rep("", nrow(df_male))
)

# Add a legend
legend(
  x = 1.2, y = 1,
  legend = toupper(gsub("_","-",proj_tumorvsctrl$tcga)),
  bty = "n", pch = 20, 
  col = colors_border, text.col = "black", cex = 0.8
)
dev.off()

jpeg("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/03_limma_pathvals/hallmark_male_limma_pathways.jpeg", quality = 100, width = 700, height = 850)

radarchart(
  df_male[,c('Evading_growth_suppressors','Sustaining_proliferative_signaling','Cellular_energetics','Resisting_cell_death','Genome_instability_and_mutation','Inducing_angiogenesis','Invasion_and_metastasis','Tumor_promoting_inflammation','Replicate_immortality','Immune_destruction')],
  # Customizing the plot
  pcol = colors_border,
  # pfcol = colors_fill,
  plwd = 2,
  plty = 1,
  cglcol = "grey",
  cglty = 1,
  axislabcol = "black",
  caxislabels = seq(0, 1, 0.2),
  vlcex = 0, # No axis labels,
    vlabels = rep("", nrow(df_male))
)

# Add a legend
legend(
  x = 1.2, y = 1,
  legend = toupper(gsub("_","-",proj_tumorvsctrl$tcga)),
  bty = "n", pch = 20,
  col = colors_border, text.col = "black", cex = 0.8
)
dev.off()

In [187]:
colnames(df_male)[c(5,2,4,10,1,7,6,8,3,9)]

In [197]:
jpeg("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/03_limma_pathvals/hallmark_male_limma_pathways.jpeg", quality = 100, width = 700, height = 850)


dev.off()

In [70]:
df <- normalized_scores
get_top_2 <- function(row) {
  sorted_indices <- order(row, decreasing = TRUE)
  top_2_values <- row[sorted_indices[1:2]]
  top_2_columns <- names(row)[sorted_indices[1:2]]
  return(list(values = top_2_values, columns = top_2_columns))
}

# Apply the function row-wise to get top 2 values and columns
top_2_values_columns <- apply(df, 1, get_top_2)
hallmark_more_freq <- do.call(rbind,lapply(1:length(top_2_values_columns), FUN=function(x) top_2_values_columns[[x]]$columns))
hallmark_more_freq <- cbind(hallmarks_tcga[,c(11,12)],hallmark_more_freq)
colnames(hallmark_more_freq) <- c("project","sex","hall1","hall2")  

In [84]:
filter_logfc_q98_funct <- function(x){

filtered_data_female <- circs_sig_dif %>%
  filter(project == x & sex == "female")

# Extract logFC values
logFC_values_female <- filtered_data_female$logFC

# Calculate the absolute values
abs_logFC_values_female <- abs(logFC_values_female)

# Compute the 98th quantile
q98_female <- quantile(abs_logFC_values_female, 0.98)

filtered_data_male <- circs_sig_dif %>%
  filter(project == x & sex == "male")

# Extract logFC values
logFC_values_male <- filtered_data_male$logFC

# Calculate the absolute values
abs_logFC_values_male <- abs(logFC_values_male)

# Compute the 98th quantile
q98_male <- quantile(abs_logFC_values_male, 0.98)

return(rbind(filtered_data_female[abs_logFC_values_female > q98_female,c("circ","sex","project","logFC")],
filtered_data_male[abs_logFC_values_male > q98_male,c("circ","sex","project","logFC")]))
}

circ_sign_logfc_q98 <- do.call(rbind,(lapply(proj_tumorvsctrl$tcga, FUN=function(x) filter_logfc_q98_funct(x))))
circ_sign_logfc_q98 <- circ_sign_logfc_q98 %>%
  mutate(sex = ifelse(sex == "female", "Female", "Male"))
circ_sign_logfc_q98 <- circ_sign_logfc_q98 %>%
  mutate(project = toupper(gsub("_", "-", project)))
                                             
jpeg("/mnt/lustre/scratch/CBRA/research/projects/tcga_mm/results/03_limma_pathvals/heatmap_circ_sign_logfc_q98_limma_pathways.jpeg", quality = 100, width = 700, height = 1200)
circ_sign_logfc_q98 %>%
ggplot(aes(x=sex , y = circ, fill = logFC)) +
geom_tile(color = "white") +
  facet_grid(. ~ project, scales = "free_y", space = "free_y") +
  labs(
    title = "",
    x = "",
    y = "Sign. cell. sign. circuits |logFC| > 98Q",
    fill = "LogFC"
  ) +
  scale_fill_gradient2(low = "darkred", mid = "white", high = "darkgreen", midpoint = 0, na.value = "grey50") +  # Adjust gradient colors as needed
  theme_minimal() + theme(
    axis.text.y = element_text(size = 8), 
          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
      strip.text.x = element_text(angle = 90)# Rotate x-axis labels vertically# Adjust the size as needed
  )
dev.off()

In [72]:
circs_sig_dif$path <- unlist(lapply(circs_sig_dif$circ, FUN=function(x) unlist(strsplit(x,":"))[1]))

In [73]:
jpeg("interpretation.jpeg", quality = 100, width = 850, height = 700)
circs_sig_dif %>% filter(project=="tcga_brca") %>% group_by(sex,path) %>% summarise(n=n()) %>% 
ggplot(aes(x=path, y=n, fill=sex)) +
  geom_bar(stat = "identity") +
  coord_flip() + # Flip coordinates for better readability
  labs(title = "Pathway Frequencies by Sex",
       x = "Pathway",
       y = "Frequency") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
dev.off()

[1m[22m`summarise()` has grouped output by 'sex'. You can override using the `.groups`
argument.


In [74]:
circs_sig_dif %>% group_by(sex, project, path) %>% summarise(n=n()) %>% filter(n>15)  %>% group_by(project, path) %>% summarise(n=n()) 

[1m[22m`summarise()` has grouped output by 'sex', 'project'. You can override using
the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'project'. You can override using the
`.groups` argument.


project,path,n
<chr>,<chr>,<int>
tcga_brca,AMPK signaling pathway,2
tcga_brca,Apoptosis,2
tcga_brca,Axon guidance,2
tcga_brca,ErbB signaling pathway,1
tcga_brca,HIF-1 signaling pathway,2
tcga_brca,Hippo signaling pathway,2
tcga_brca,MAPK signaling pathway,2
tcga_brca,NF-kappa B signaling pathway,2
tcga_brca,PI3K-Akt signaling pathway,2
tcga_brca,PPAR signaling pathway,2
