In [36]:
suppressPackageStartupMessages({
    library(dplyr)
    library(readr)
    library(tidyr)
})

[Analysis Data](https://www.proteinatlas.org/download/rna_immune_cell_monaco.tsv.zip)

In [21]:
data <- read_tsv("rna_immune_cell_monaco.tsv.zip") %>% as.data.frame()

[1mRows: [22m[34m604860[39m [1mColumns: [22m[34m5[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m (3): Gene, Gene name, Immune cell
[32mdbl[39m (2): TPM, pTPM

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [23]:
head(data)

Unnamed: 0_level_0,Gene,Gene name,Immune cell,TPM,pTPM
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>
1,ENSG00000000003,TSPAN6,basophil,0.5,0.9
2,ENSG00000000003,TSPAN6,Central memory CD8 T-cell,1.1,1.8
3,ENSG00000000003,TSPAN6,classical monocyte,0.2,0.2
4,ENSG00000000003,TSPAN6,Effector memory CD8 T-cell,0.5,0.7
5,ENSG00000000003,TSPAN6,Exhausted memory B-cell,0.4,0.7
6,ENSG00000000003,TSPAN6,intermediate monocyte,0.1,0.2


In [47]:
protein_atlas <- c(
  "LILRB1",
  "LILRB2",
  "LAIR1",
  "HLA-G",
  "HAVCR2",   # TIM3
  "CD33",     # SIGLEC3
  "SIGLEC7",
  "SIGLEC9",
  "SIRPA"
)

celltype <- c(
  "basophil", "Central memory CD8 T-cell", "classical monocyte", 
  "Effector memory CD8 T-cell", "Exhausted memory B-cell", "intermediate monocyte", 
  "MAIT T-cell", "Memory CD4 T-cell TFH", "Memory CD4 T-cell Th1", 
  "Memory CD4 T-cell Th1/Th17", "Memory CD4 T-cell Th17", "Memory CD4 T-cell Th2", 
  "myeloid DC", "naive B-cell", "naive CD4 T-cell", 
  "naive CD8 T-cell", "neutrophil", "NK-cell", 
  "non-classical monocyte", "Non-switched memory B-cell", "Non-Vd2 gdTCR", 
  "Plasmablast", "plasmacytoid DC", "Progenitor cell", 
  "Switched memory B-cell", "T-reg", "Terminal effector memory CD4 T-cell", 
  "Terminal effector memory CD8 T-cell", "total PBMC", "Vd2 gdTCR"
)

In [57]:
# Filter and pivot the dataframe
filtered_df <- data %>%
  filter(`Gene name` %in% protein_atlas) %>%
  filter(`Immune cell` %in% celltype) %>%
  select(-c(Gene, pTPM)) %>% 
  pivot_wider(names_from = "Gene name", values_from = "TPM")


In [58]:
head(filtered_df)

Immune cell,LILRB1,CD33,SIGLEC9,LILRB2,HAVCR2,LAIR1,SIGLEC7,SIRPA,HLA-G
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
basophil,0.4,39.3,2.4,78.6,8.3,49.4,5.8,16.5,0
Central memory CD8 T-cell,0.8,0.3,1.2,0.0,6.7,11.2,0.0,0.0,0
classical monocyte,98.0,192.1,293.0,334.2,63.9,27.3,106.3,122.1,0
Effector memory CD8 T-cell,29.1,0.3,13.4,0.0,15.5,21.7,0.8,0.0,0
Exhausted memory B-cell,52.1,7.2,14.5,35.6,6.0,11.5,5.9,5.8,0
intermediate monocyte,274.7,121.4,210.3,719.1,68.6,52.9,75.0,27.3,0


In [59]:
write.table(filtered_df, "result.csv", sep = ",", row.names = FALSE)