---
# Dataset Formatting: Bi-RCC
*L.Richards*  
*2020-06-16*  
*/cluster/projects/pughlab/projects/cancer_scrna_integration/data/Bi-RCC*  

---

Format Bi-RCC dataset. This will be input for data integration tools.  
> Paper: https://www.cell.com/cancer-cell/pdf/S1535-6108(21)00117-3.pdf  
> Data Download: https://singlecell.broadinstitute.org/single_cell/study/SCP1288/tumor-and-immune-reprogramming-during-immunotherapy-in-advanced-renal-cell-carcinoma#

In [None]:
library(Seurat) #v4.0.1
library(earlycross) # v0.1
library(data.table)

---
## 1.0 Format downloaded public data
---


In [None]:
# read 10x counts
# 39391 cells & 60,627 genes... which does not matchs the single cell portal processed data
# portal has 34,326 cells
counts.file <- "/cluster/projects/pughlab/projects/cancer_scrna_integration/data/Bi-RCC/original-data/SCP1288/expression/60c76a18771a5b0ba10ea91b/"
dat <- Read10X(counts.file)

# load normalized expression data to filter raw counts with 
norm.file <- "/cluster/projects/pughlab/projects/cancer_scrna_integration/data/Bi-RCC/original-data/SCP1288/expression/ccRCC_scRNASeq_NormalizedCounts.txt.gz"
norm <- fread(norm.file)
norm <- data.frame(norm)
rownames(norm) <- norm$GENE
norm$GENE <- NULL

# filter raw counts by genes and cell barcodes in normalized matrix
table(colnames(dat) %in% colnames(norm)) # sanity check
dat <- dat[rownames(dat) %in% rownames(norm), colnames(dat) %in% colnames(norm)]
dim(dat) # 32636 genes x  34326 cells -- looks good now!
rm(norm)

# load metadata
meta.file <- "/cluster/projects/pughlab/projects/cancer_scrna_integration/data/Bi-RCC/original-data/SCP1288/metadata/Final_SCP_Metadata.txt"
meta <- fread(meta.file)
meta <- data.frame(meta)
meta <- meta[-1, ]
rownames(meta) <- meta$NAME
donor_id

# format names to match harmonized names
# "SampleID", "PatientID", "CellType" 
colnames(meta) <- gsub("donor_id", "PatientID", colnames(meta)) # 8 patients
meta$SampleID <- paste0(meta$PatientID, "_", meta$ICB_Exposed, "_", meta$TKI_Exposed) # 8 samples, no pairs

# create seurat object
dat <- CreateSeuratObject(counts = dat,
                          meta.data = meta
                         )

In [None]:
# take a peek at cell annotations
cat(unique(meta$FinalCellType), sep = "\n")

# clean up cell names to match other studies
dat@meta.data$CellType <- dat@meta.data$FinalCellType

dat@meta.data$CellType[grep("41BB", dat@meta.data$CellType)] <- "T_cells"
dat@meta.data$CellType[grep("T-helper", dat@meta.data$CellType)] <- "T_cells"
dat@meta.data$CellType[grep("CD8+", dat@meta.data$CellType)] <- "T_cells"
dat@meta.data$CellType[grep("Helper", dat@meta.data$CellType)] <- "T_cells"
dat@meta.data$CellType[grep("NKT$", dat@meta.data$CellType)] <- "T_cells"
dat@meta.data$CellType[grep("T-Reg", dat@meta.data$CellType)] <- "T_cells"

dat@meta.data$CellType[grep("NK$", dat@meta.data$CellType)] <- "NK_cells"

dat@meta.data$CellType[grep("DC$", dat@meta.data$CellType)] <- "DCs"
dat@meta.data$CellType[grep("TAM$", dat@meta.data$CellType)] <- "Macrophages"
dat@meta.data$CellType[grep("Macrophage$", dat@meta.data$CellType)] <- "Macrophages"
dat@meta.data$CellType[grep("Monocyte$", dat@meta.data$CellType)] <- "Macrophages"
dat@meta.data$CellType[grep("Myeloid$", dat@meta.data$CellType)] <- "Macrophages"

dat@meta.data$CellType <- gsub("^B cell$", "B_cells", dat@meta.data$CellType)

dat@meta.data$CellType <- gsub("^TP1$", "Malignant", dat@meta.data$CellType)
dat@meta.data$CellType <- gsub("^TP2$", "Malignant", dat@meta.data$CellType)
dat@meta.data$CellType <- gsub("^Cycling Tumor$", "Malignant", dat@meta.data$CellType)

dat@meta.data$CellType <- gsub("^Plasma cell$", "Plasma_cells", dat@meta.data$CellType)
dat@meta.data$CellType <- gsub("^Mast cell$", "Mast_cells", dat@meta.data$CellType)

# remove Misc/Undetermined cells
Idents(dat) <- "CellType"
dat <- subset(dat, idents = "Misc/Undetermined", invert = T)

# save seurat object
saveRDS(dat, file = "Bi-RCC_seurat.rds")

---
## 2.0 Output files in 10x common format
---

Output counts matrix with 10x/CellRanger formatting style and metadata csv.

In [None]:
# save metadata as csv file
meta <- data.frame(dat@meta.data)
write.csv(meta, file = "Bi-RCC_meta.csv")

In [None]:
# export count matrix as default 10x CellRanger output
Write10X(dat, dir = "./")

---