---
# Dataset Formatting: Yost-BCC
*L.Richards*  
*2020-06-07*  
*/cluster/projects/pughlab/projects/cancer_scrna_integration/data/Yost-BCC/*  

---

Format Yost-BCC dataset. Yost et al., also profiled SCC, but the SCC dataset does not have any malignant cells. This will be input for data integration tools.

In [None]:
library(Seurat) #v4.0.1
library(earlycross) # v0.1
library(data.table)

---
## 1.0 Format downloaded public data
---

Downloaded data from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE123813

In [None]:
### BCC

# read and format files from GEO accession
meta <- read.table("./original-data/GSE123813_bcc_all_metadata.txt.gz",
                   sep = "\t",
                   header = T
                  )
rownames(meta) <- meta$cell.id
meta$CancerType <- "BCC"
colnames(meta) <- gsub("UMAP", "BCC_UMAP", colnames(meta))

counts <- read.table("./original-data/GSE123813_bcc_scRNA_counts.txt.gz",
                   sep = "\t",
                   header = T
                  )

# combine into a seurat object
bcc <- CreateSeuratObject(counts = counts, 
                          meta.data = meta
                         )

In [None]:
# add a sample ID field
bcc@meta.data$SampleID <- paste0(bcc@meta.data$patient,
                                "_", 
                                bcc@meta.data$treatment
                                )

# simplify author cell type labels into more generic terms
bcc@meta.data$CellType <- bcc@meta.data$cluster
bcc@meta.data$CellType <- gsub("B_cells_1", "B_cells", bcc@meta.data$CellType)
bcc@meta.data$CellType <- gsub("B_cells_2", "B_cells", bcc@meta.data$CellType)
bcc@meta.data$CellType <- gsub("CD4_T_cells", "T_cells", bcc@meta.data$CellType)
bcc@meta.data$CellType <- gsub("CD8_act_T_cells", "T_cells", bcc@meta.data$CellType)
bcc@meta.data$CellType <- gsub("CD8_ex_T_cells", "T_cells", bcc@meta.data$CellType)
bcc@meta.data$CellType <- gsub("CD8_mem_T_cells", "T_cells", bcc@meta.data$CellType)
bcc@meta.data$CellType <- gsub("Tcell_prolif", "T_cells", bcc@meta.data$CellType)
bcc@meta.data$CellType <- gsub("Tregs", "T_cells", bcc@meta.data$CellType)
bcc@meta.data$CellType <- gsub("Tumor_1", "Malignant", bcc@meta.data$CellType)
bcc@meta.data$CellType <- gsub("Tumor_2", "Malignant", bcc@meta.data$CellType)

# save seurat object
saveRDS(bcc, file = "Yost-BCC_seurat.rds")

---
## 2.0 Output files in 10x common format
---

Output counts matrix with 10x/CellRanger formatting style and metadata csv.

In [None]:
# save metadata as csv file
meta <- data.frame(bcc@meta.data)
write.csv(meta, file = "Yost-BCC_meta.csv")

In [None]:
# export count matrix as default 10x CellRanger output
Write10X(bcc, dir = "./")

---