---
# Dataset Formatting: Yost-BCC-SCC
*L.Richards*  
*2020-06-07*  
*/cluster/projects/pughlab/projects/cancer_scrna_integration/data/Yost-BCC-SCC/*  

---

Format Yost-BCC-SCC dataset. This will be input for data integration tools.

In [None]:
library(Seurat) #v4.0.1
library(earlycross) # v0.1
library(data.table)

---
## 1.0 Format downloaded public data
---

Downloaded data from https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE123813

In [None]:
### BCC

# read and format files from GEO accession
meta <- read.table("./original-data/GSE123813_bcc_all_metadata.txt.gz",
                   sep = "\t",
                   header = T
                  )
rownames(meta) <- meta$cell.id
meta$CancerType <- "BCC"
colnames(meta) <- gsub("UMAP", "BCC_UMAP", colnames(meta))

counts <- read.table("./original-data/GSE123813_bcc_scRNA_counts.txt.gz",
                   sep = "\t",
                   header = T
                  )

# combine into a seurat object
bcc <- CreateSeuratObject(counts = counts, 
                          meta.data = meta
                         )



In [None]:
### SCC
meta <- read.table("./original-data/GSE123813_scc_metadata.txt.gz",
                   sep = "\t",
                   header = T
                  )
rownames(meta) <- meta$cell.id
meta$CancerType <- "SCC"
colnames(meta) <- gsub("UMAP", "SCC_UMAP", colnames(meta))

counts <- read.table("./original-data/GSE123813_scc_scRNA_counts.txt.gz",
                   sep = "\t",
                   header = T
                  )

# combine into a seurat object
scc <- CreateSeuratObject(counts = counts, 
                          meta.data = meta
                         )

In [None]:
### COMBINE SCC & BCC
combo <- merge(bcc, y = scc)

# save seurat object
saveRDS(combo, file = "Yost-BCC-SCC_seurat.rds")

---
## 2.0 Output files in 10x common format
---

Output counts matrix with 10x/CellRanger formatting style and metadata csv.

In [None]:
# save metadata as csv file
meta <- data.frame(combo@meta.data)
write.csv(meta, file = "Yost-BCC-SCC_meta.csv")

In [None]:
# export count matrix as default 10x CellRanger output
Write10X(combo, dir = "./")

---