In [123]:
suppressPackageStartupMessages({
    library(dplyr)
    library(readr)
    library(tidyr)
    library(tibble)
})

[Analysis Data](https://www.proteinatlas.org/download/rna_immune_cell_monaco.tsv.zip)

In [124]:
data <- read_tsv("rna_immune_cell_monaco.tsv.gz") %>% as.data.frame()
protein_atlas <- c(
"PTPRC", "TFRC", "CD74", "ITGAL", "CLEC4C", "EEA1", "RAB5A", "CD63", "RAB7A", "LAMP1"

)

celltype <- c(
  "Central memory CD8 T-cell", 
  "Effector memory CD8 T-cell", 
  "Exhausted memory B-cell", 
  "Memory CD4 T-cell TFH", 
  "Memory CD4 T-cell Th1", 
  "Memory CD4 T-cell Th1/Th17", 
  "Memory CD4 T-cell Th17", 
  "Memory CD4 T-cell Th2", 
  "naive B-cell", 
  "naive CD4 T-cell", 
  "naive CD8 T-cell", 
  "Non-switched memory B-cell", 
  "Plasmablast", 
  "Switched memory B-cell", 
  "T-reg", 
  "Terminal effector memory CD4 T-cell", 
  "Terminal effector memory CD8 T-cell", 
  "Vd2 gdTCR",
  "plasmacytoid DC"
)
# Filter and pivot the dataframe
filtered_df <- data %>%
  filter(`Gene name` %in% protein_atlas) %>%
  filter(`Immune cell` %in% celltype) %>%
  dplyr::select(-c(Gene, pTPM)) %>% 
  pivot_wider(names_from = "Gene name", values_from = "TPM") %>%
 dplyr::select(`Immune cell`, PTPRC, TFRC, CD74, ITGAL, CLEC4C, EEA1, RAB5A, CD63, RAB7A, LAMP1) %>%
 rename(Cell = `Immune cell`) %>% data.frame()
immune <- filtered_df
head(immune)

[1mRows: [22m[34m604860[39m [1mColumns: [22m[34m5[39m
[36m──[39m [1mColumn specification[22m [36m───────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (3): Gene, Gene name, Immune cell
[32mdbl[39m (2): TPM, pTPM

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


Unnamed: 0_level_0,Cell,PTPRC,TFRC,CD74,ITGAL,CLEC4C,EEA1,RAB5A,CD63,RAB7A,LAMP1
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Central memory CD8 T-cell,1110.6,7.6,709.3,185.0,0.2,5.9,46.1,47.8,161.1,10.5
2,Effector memory CD8 T-cell,1410.5,10.0,1828.8,347.7,0.5,5.8,57.9,71.3,205.8,12.2
3,Exhausted memory B-cell,335.3,23.0,16365.5,50.1,1.0,10.1,27.7,44.5,112.2,4.7
4,Memory CD4 T-cell TFH,998.3,8.0,498.2,133.5,0.3,5.0,33.8,26.5,151.8,3.9
5,Memory CD4 T-cell Th1,937.6,10.7,622.9,175.7,0.4,6.8,45.0,43.2,158.8,8.9
6,Memory CD4 T-cell Th1/Th17,954.2,7.5,545.4,168.6,0.4,4.7,46.1,52.7,168.1,8.5


In [158]:
emtab <- read_tsv("E-MTAB-6867-query-results.tsv", skip = 4)
genes <- c(
"PTPRC", "TFRC", "CD74", "ITGAL", "CLEC4C", "EEA1", "RAB5A", "CD63", "RAB7A", "LAMP1"

)
emtab <- emtab[emtab$`Gene Name` %in% genes, ]
emtab <- emtab %>%
    dplyr::select(-`Gene ID`) %>%
    dplyr::rename(Cell = `Gene Name`)

# Transpose the data
emtab <- t(emtab)

# Convert to a data frame
emtab <- as.data.frame(emtab)

# Set the first row as column names
colnames(emtab) <- emtab[1, ]

# Display the modified data frame
emtab


[1mRows: [22m[34m34104[39m [1mColumns: [22m[34m8[39m
[36m──[39m [1mColumn specification[22m [36m───────────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (2): Gene ID, Gene Name
[32mdbl[39m (6): ARH77, Daudi, NAMALWA, Raji, Ramos, Toledo

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


Unnamed: 0_level_0,ITGAL,CD74,TFRC,RAB7A,PTPRC,EEA1,CD63,RAB5A,LAMP1,CLEC4C
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
Cell,ITGAL,CD74,TFRC,RAB7A,PTPRC,EEA1,CD63,RAB5A,LAMP1,CLEC4C
ARH77,52.0,3121.0,426.0,296.0,69.0,25.0,112.0,34.0,102.0,0.1
Daudi,53.0,5289.0,206.0,248.0,326.0,27.0,36.0,39.0,55.0,0.1
NAMALWA,9.0,2389.0,193.0,346.0,179.0,20.0,103.0,43.0,105.0,0.1
Raji,29,3744,219,251,174,27,49,48,45,
Ramos,104,2333,175,344,99,13,32,72,70,
Toledo,12,4070,255,283,202,20,115,45,89,2


In [125]:
ccle <- read.table("CCLE_RNAseq_rsem_genes_tpm_20180929.txt.gz", header = TRUE, row.names = 1) 
# Select cells of interest
counts_subset <- ccle
patterns <- c("Raji", "Jurkat")
selected_columns <- colnames(counts_subset)[apply(sapply(patterns, function(p) grepl(p, colnames(counts_subset), ignore.case = TRUE)), 1, any)]
counts_subset <- counts_subset[, selected_columns]

# Clean up data
rownames(counts_subset) <- gsub("\\..*", "", rownames(counts_subset))
counts_subset <- as.matrix(counts_subset)
mart <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
ensembl_annot <- getBM(attributes=c("ensembl_gene_id","external_gene_name"),
                       filters = "ensembl_gene_id",
                       values = rownames(counts_subset), 
                       mart = mart)
cols <- as.data.frame(rownames(counts_subset))
new_names <- left_join(cols, ensembl_annot, by = c("rownames(counts_subset)" = "ensembl_gene_id"))
rownames(counts_subset) <- new_names$external_gene_name
count_df <- as.data.frame(counts_subset)
genes <- c(
"PTPRC", "TFRC", "CD74", "ITGAL", "CLEC4C", "EEA1", "RAB5A", "CD63", "RAB7A", "LAMP1"

)
filtered_df <- count_df[rownames(count_df) %in% genes, ]
filtered_df <- cbind(Cell = rownames(filtered_df), filtered_df)
filtered_df <- t(filtered_df)
filtered_df <- as.data.frame(filtered_df)
filtered_df$Cell <- rownames(filtered_df)
rownames(filtered_df) <- NULL
ccle <- filtered_df


ccle <- ccle[-1, ] %>%
        dplyr::select(`Cell`, PTPRC, TFRC, CD74, ITGAL, CLEC4C, EEA1, RAB5A, CD63, RAB7A, LAMP1) %>%
        mutate(Cell = recode(Cell, 
                           "JURKAT_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE" = "Jurkat", 
                           "RAJI_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE" = "Raji"))

In [126]:
fin <- rbind(ccle, immune)
fin

Unnamed: 0_level_0,Cell,PTPRC,TFRC,CD74,ITGAL,CLEC4C,EEA1,RAB5A,CD63,RAB7A,LAMP1
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
2,Jurkat,101.5,60.68,1.37,12.15,0.0,3.67,30.64,93.76,107.26,55.28
3,Raji,91.0,115.81,2909.4,21.28,0.0,7.06,14.21,53.42,121.16,32.35
1,Central memory CD8 T-cell,1110.6,7.6,709.3,185.0,0.2,5.9,46.1,47.8,161.1,10.5
21,Effector memory CD8 T-cell,1410.5,10.0,1828.8,347.7,0.5,5.8,57.9,71.3,205.8,12.2
31,Exhausted memory B-cell,335.3,23.0,16365.5,50.1,1.0,10.1,27.7,44.5,112.2,4.7
4,Memory CD4 T-cell TFH,998.3,8.0,498.2,133.5,0.3,5.0,33.8,26.5,151.8,3.9
5,Memory CD4 T-cell Th1,937.6,10.7,622.9,175.7,0.4,6.8,45.0,43.2,158.8,8.9
6,Memory CD4 T-cell Th1/Th17,954.2,7.5,545.4,168.6,0.4,4.7,46.1,52.7,168.1,8.5
7,Memory CD4 T-cell Th17,991.2,15.8,384.2,139.7,0.6,7.0,54.6,44.3,191.0,9.1
8,Memory CD4 T-cell Th2,793.3,14.2,440.2,112.1,0.4,7.0,44.5,45.4,169.0,6.6


In [127]:
write.csv(fin, "result.csv", row.names = FALSE)