In [1]:
library(singscore)
library(tidyverse)
library(ggplot2)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.2     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.4.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


In [2]:
# Set random seed so this part is reproducible
# https://www.random.org/ 2023-08-09
set.seed(3866) 

In [3]:
gs0 <- read.table("../../../../data/gs/IO-Atlas-NSCLC-NSCLC-Response-Cluster-0-TPM-MinMaxNorm-TRAIN-2023-08-10-V4-gene-sets.tsv",
                 sep='\t', 
                 header=T)

gs1 <- read.table("../../../../data/gs/IO-Atlas-NSCLC-NSCLC-Response-Cluster-1-TPM-MinMaxNorm-TRAIN-2023-08-10-V4-gene-sets.tsv",
                 sep='\t', 
                 header=T)

gs2 <- read.table("../../../../data/gs/IO-Atlas-NSCLC-NSCLC-Response-Cluster-2-TPM-MinMaxNorm-TRAIN-2023-08-10-V4-gene-sets.tsv",
                 sep='\t', 
                 header=T)

gene_sets <- list(gs0, gs1, gs2)
up_gene_set_db <- list()
dwn_gene_set_db <- list()

for (i in seq(3)) {
    up_response_subtype_gs_db <- list()
    dwn_response_subtype_gs_db <- list()
    for (gene_set in unique(gene_sets[i][[1]]$gs))
        {
            
            up_response_subtype_gs_db[[gene_set]] <- gene_sets[i][[1]] %>% filter((gs == gene_set) & (direction == "up")) %>%  pull("gene") %>% unique()
            dwn_response_subtype_gs_db[[gene_set]] <- gene_sets[i][[1]] %>% filter((gs == gene_set) & (direction == "down")) %>%  pull("gene") %>% unique()
        }
    up_gene_set_db[[i]] <- up_response_subtype_gs_db
    dwn_gene_set_db[[i]] <- dwn_response_subtype_gs_db
}


In [4]:
minmax <- read.table("../../../../data/expression/processed/train/V4/IO-Atlas-NSCLC-TPM-MinMaxNorm-TRAIN-2023-08-10-V4.tsv", 
                          sep='\t', 
                          row.names=1, 
                          header=T)

labels <- read.table("../../../../data/expression/processed/train/V4/IO-Atlas-NSCLC-LABEL-TRAIN-2023-08-10-V4.tsv", 
                  sep='\t', 
                  row.names=1, 
                  header=T)

rnk <- rankGenes(minmax)

scores <- data.frame()

for (i in seq(3))
    {
        
        for (gs in names(up_gene_set_db[[i]]))
            {
                score <- simpleScore(rnk, 
                                     upSet = up_gene_set_db[[i]][[gs]],
                                     downSet = dwn_gene_set_db[[i]][[gs]])
            
                score_name <- sprintf("Response Subtype Cluster %s %s", i - 1, gs)
            
                scores[row.names(score), score_name] <- score$TotalScore
            }
    }

write.table(scores, 
             "../../../../data/enrichment/IO-Atlas-NSCLC-TPM-MinMaxNorm-TRAIN-2023-08-10-V4-response-subtype-clusters.tsv",
             sep='\t')

In [5]:
minmax <- read.table("../../../../data/expression/processed/test/V4/IO-Atlas-NSCLC-TPM-MinMaxNorm-TEST-2023-08-10-V4.tsv", 
                          sep='\t', 
                          row.names=1, 
                          header=T)

labels <- read.table("../../../../data/expression/processed/test/V4/IO-Atlas-NSCLC-LABEL-TEST-2023-08-10-V4.tsv", 
                  sep='\t', 
                  row.names=1, 
                  header=T)

rnk <- rankGenes(minmax)

scores <- data.frame()

for (i in seq(3))
    {
        
        for (gs in names(up_gene_set_db[[i]]))
            {
                score <- simpleScore(rnk, 
                                     upSet = up_gene_set_db[[i]][[gs]],
                                     downSet = dwn_gene_set_db[[i]][[gs]])
            
                score_name <- sprintf("Response Subtype Cluster %s %s", i, gs)
            
                scores[row.names(score), score_name] <- score$TotalScore
            }
    }

write.table(scores, 
             "../../../../data/enrichment/IO-Atlas-NSCLC-TPM-MinMaxNorm-TEST-2023-08-10-V4-response-subtype-clusters.tsv",
             sep='\t')