---
# Dataset Formatting: Griffiths-BRCA
*L.Richards*  
*2020-06-16*  
*/cluster/projects/pughlab/projects/cancer_scrna_integration/data/Griffiths-BRCA*  

---

Format Griffiths-BRCA. This will be input for data integration tools.

> Paper: https://www.nature.com/articles/s43018-021-00215-7    
> Data Download: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE158724 
> **Obtained cell level metadata directly from A.Bild*

In [None]:
library(Seurat) #v4.0.1
library(earlycross) # v0.1
library(data.table)

---
## 1.0 Format downloaded public data
---


In [None]:
# read in 10x counts matrix
# this matrix only has cancer cells -_-
counts.file <- "/cluster/projects/pughlab/projects/cancer_scrna_integration/data/Griffiths-BRCA/original-data/GSE158724_10x.counts.txt"
counts <- fread(counts.file)
counts <- data.frame(counts)
rownames(counts) <- counts$Gene.ID
counts$Gene.ID <- NULL
dim(counts) # 21,279 genes x 110,569 cells

# read in metadata files, combine into one big matrix across cell types
# all files have the same header
meta.path <- "/cluster/projects/pughlab/projects/cancer_scrna_integration/data/Griffiths-BRCA/original-data"
meta.files <- list.files(meta.path, pattern = "metadata.txt")
meta <- list()

for (i in 1:length(meta.files)){
    
    a <- fread(paste0(meta.path, "/", meta.files[i]))
    a <- data.frame(a)
    rownames(a) <- a$Cell.ID
    meta[[i]] <- a
    
}

meta <- do.call(rbind, meta) # 176,281 cells 

# check that cells in counts are also in meta
colnames(counts) <- gsub("^P", "FEL0", colnames(counts))
table(colnames(counts) %in% rownames(meta)) # good to go!
meta <- meta[colnames(counts), ] # subset meta to match counts

# edit metadata columns (SampleID, PatientID, CellType)
colnames(meta) <- gsub("Sample", "SampleID", colnames(meta))
colnames(meta) <- gsub("Patient", "PatientID", colnames(meta))

# clean up cell names to match other studies
meta$CellType <- dat@meta.data$Type


In [None]:
# create a seurat object
# 
dat <- CreateSeuratObject(counts = counts, 
                          meta.data = meta
                         )

In [None]:





# save seurat object
saveRDS(combo, file = "Griffiths-BRCA_seurat.rds")

---
## 2.0 Output files in 10x common format
---

Output counts matrix with 10x/CellRanger formatting style and metadata csv.

In [None]:
# save metadata as csv file
meta <- data.frame(combo@meta.data)
write.csv(meta, file = "Griffiths-BRCA_meta.csv")

In [None]:
# export count matrix as default 10x CellRanger output
Write10X(combo, dir = "./")

---