In [1]:
suppressPackageStartupMessages({
    library(dplyr)
    library(ggplot2)
    library(biomaRt)
    library(dplyr)
})

In [31]:
counts <- read.table("CCLE_RNAseq_rsem_genes_tpm_20180929.txt", header = TRUE, row.names = 1) 

In [32]:
# Select cells of interest
patterns <- c("Raji", "BDCM", "MOLP8", "TMD8", "MEC1")
selected_columns <- colnames(counts)[apply(sapply(patterns, function(p) grepl(p, colnames(counts), ignore.case = TRUE)), 1, any)]
counts <- counts[, selected_columns]

# Clean up data
rownames(counts) <- gsub("\\..*", "", rownames(counts))
counts <- as.matrix(counts)
head(counts)

Unnamed: 0,BDCM_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,MEC1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,MOLP8_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,RAJI_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE
ENSG00000000003,0.21,0.83,0.1,0.05
ENSG00000000005,0.0,0.0,0.0,0.0
ENSG00000000419,62.89,89.76,58.3,62.54
ENSG00000000457,8.03,6.73,8.26,3.76
ENSG00000000460,19.71,11.22,10.05,12.04
ENSG00000000938,119.26,62.63,102.65,117.44


In [33]:
mart <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
ensembl_annot <- getBM(attributes=c("ensembl_gene_id","external_gene_name"),
                       filters = "ensembl_gene_id",
                       values = rownames(counts), 
                       mart = mart)
head(ensembl_annot)

Unnamed: 0_level_0,ensembl_gene_id,external_gene_name
Unnamed: 0_level_1,<chr>,<chr>
1,ENSG00000000003,TSPAN6
2,ENSG00000000005,TNMD
3,ENSG00000000419,DPM1
4,ENSG00000000457,SCYL3
5,ENSG00000000460,FIRRM
6,ENSG00000000938,FGR


In [34]:
cols <- as.data.frame(rownames(counts))
new_names <- left_join(cols, ensembl_annot, by = c("rownames(counts)" = "ensembl_gene_id"))
rownames(counts) <- new_names$external_gene_name
count_df <- as.data.frame(counts)
head(count_df)

Unnamed: 0_level_0,BDCM_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,MEC1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,MOLP8_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,RAJI_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>
TSPAN6,0.21,0.83,0.1,0.05
TNMD,0.0,0.0,0.0,0.0
DPM1,62.89,89.76,58.3,62.54
SCYL3,8.03,6.73,8.26,3.76
FIRRM,19.71,11.22,10.05,12.04
FGR,119.26,62.63,102.65,117.44


In [37]:
genes <- c(
  "LILRB1",
  "LILRB2",
  "LILRB3",
  "LAIR1",
  "LILRB3",
  "LILRB4",
  "LILRB5",
  "LILRA1",
  "LILRA2",
  "LILRA3",
  "LILRA4",
  "LILRA5",
  "LILRA6",
  "MS4A1",
  "CD19"

)

In [38]:
filtered_df <- count_df[rownames(count_df) %in% genes, ]
filtered_df

Unnamed: 0_level_0,BDCM_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,MEC1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,MOLP8_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,RAJI_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>
LILRB1,58.2,80.92,104.3,17.12
LILRA1,0.03,0.08,0.01,0.02
LILRB5,0.01,0.05,0.0,0.01
LILRB2,0.02,0.0,0.04,0.01
MS4A1,983.18,524.18,0.13,289.31
LAIR1,0.11,0.3,32.11,0.14
CD19,82.26,153.21,0.15,136.56
LILRB4,0.06,9.32,7.95,0.04
LILRA5,0.0,0.0,0.0,0.02
LILRB3,0.27,0.56,1.36,0.43


In [39]:
filtered_df <- count_df %>%
    dplyr::filter(rownames(.) %in% genes)%>%
    dplyr::mutate(is_lil = grepl("^LIL", rownames(.))) %>% 
    dplyr::arrange(desc(is_lil), rownames(.)) %>%
    dplyr::select(-is_lil) %>%
    dplyr::rename(
        `BDCM Counts (tpm)` = BDCM_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,
        `RAJI Counts (tpm)` = RAJI_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE ,
        `MOLP8 Counts (tpm)` = MOLP8_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE,
        `MEC1 Counts (tpm)` = MEC1_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE)
filtered_df

Unnamed: 0_level_0,BDCM Counts (tpm),MEC1 Counts (tpm),MOLP8 Counts (tpm),RAJI Counts (tpm)
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>
LILRA1,0.03,0.08,0.01,0.02
LILRA2,0.48,2.61,0.26,0.01
LILRA4,0.17,0.36,0.01,0.06
LILRA5,0.0,0.0,0.0,0.02
LILRA6,0.04,0.09,0.0,0.11
LILRB1,58.2,80.92,104.3,17.12
LILRB2,0.02,0.0,0.04,0.01
LILRB3,0.27,0.56,1.36,0.43
LILRB4,0.06,9.32,7.95,0.04
LILRB5,0.01,0.05,0.0,0.01


In [40]:
write.table(filtered_df, "result.csv", sep = ",", row.names = TRUE)