In [1]:
library(Seurat)
library(scRepertoire)
library(stringr)
library(dplyr)
library(tibble)

Loading required package: SeuratObject

Loading required package: sp


Attaching package: ‘SeuratObject’


The following objects are masked from ‘package:base’:

    intersect, t


Loading required package: ggplot2


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
list.files('~/data1/eva/GSE178881/cellranger_out')

In [3]:
gex_path <- function(sample) file.path('~/data1/eva/GSE178881/cellranger_out', sample, 'outs/per_sample_outs', sample, 
                                  'count/sample_filtered_feature_bc_matrix')
obj_list <- list()
for(sample in list.files('~/data1/eva/GSE178881/cellranger_out'))
{
    obj_i <- CreateSeuratObject(counts = Read10X(data.dir = gex_path(sample)), project = sample, min.cells = 0, min.features = 1)
    obj_i <- RenameCells(object = obj_i, new.names = paste(sample,Cells(obj_i),sep='_'))
    obj_list[[sample]] <- obj_i
}

In [4]:
seu <- merge(x = obj_list[[1]], y = obj_list[2:length(obj_list)])
seu <- JoinLayers(seu)
seu

An object of class Seurat 
32285 features across 109561 samples within 1 assay 
Active assay: RNA (32285 features, 0 variable features)
 1 layer present: counts

In [5]:
geo_meta <- read.csv('~/data1/eva/external/GSE178881_cell_information.txt', sep='\t')

In [6]:
geo_meta <- read.csv('~/data1/eva/external/GSE178881_cell_information.txt', sep='\t')
geo_meta$sample <- recode(geo_meta$sample, 'D10-LN-Ctrl'='D10LN','D10-SP-Ctrl'='D10SP','D10-TM-Ctrl'='D10TM','D20-LN-aPDL1'='D20LN_aPDL1',
       'D20-LN-aPDL1&vac'='D20LN_Combo','D20-LN-Ctrl'='D20LN','D20-LN-vac'='D20LN_Vac','D20-SP-Ctrl'='D20SP',
       'D20-TM-aPDL1'='D20TM_aPDL1','D20-TM-Ctrl'='D20TM','D20-TM-aPDL1&vac'='D20TM_Combo','D20-TM-vac'='D20TM_Vac')
geo_meta$cell_barcode <- paste0(geo_meta$sample, '_', str_split_fixed(geo_meta$cell_barcode, '-', 2)[,1], '-1')
geo_meta <- column_to_rownames(geo_meta, 'cell_barcode')

In [7]:
tail(geo_meta$cell_barcode)

NULL

In [8]:
seu <- subset(seu, cells=rownames(geo_meta))
seu <- NormalizeData(seu)
seu <- AddMetaData(seu, geo_meta)

Normalizing layer: counts



In [9]:
head(seu@meta.data)

Unnamed: 0_level_0,orig.ident,nCount_RNA,nFeature_RNA,sample,a_cdr3,b_cdr3,ab_cdr3,clusters,tetramer.
Unnamed: 0_level_1,<chr>,<dbl>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<lgl>
D10LN_AAACCTGAGAAACGAG-1,D10LN,6553,2108,D10LN,CAASSDSNYQLIW,CASSLTTNSDYTF,TRA:CAASSDSNYQLIW&TRB:CASSLTTNSDYTF,CD8-01-Sell-P.na,False
D10LN_AAACCTGAGAAGGCCT-1,D10LN,4630,1636,D10LN,,CAEGTEVFF,,CD8-01-Sell-P.na,False
D10LN_AAACCTGAGCGAGAAA-1,D10LN,3934,1497,D10LN,CALGGNNNAPRF,CASRAGHPNTEVFF,TRA:CALGGNNNAPRF&TRB:CASRAGHPNTEVFF,CD8-01-Sell-P.na,False
D10LN_AAACCTGAGCTCTCGG-1,D10LN,3203,1466,D10LN,,CASSLDRNSDYTF,,CD4-01-Tcf7-P.na,False
D10LN_AAACCTGCACATTAGC-1,D10LN,6549,2014,D10LN,CAIDRVSNYNVLYF,CASSPRLGVYEQYF,TRA:CAIDRVSNYNVLYF&TRB:CASSPRLGVYEQYF,CD8-01-Sell-P.na,False
D10LN_AAACCTGCACTATCTT-1,D10LN,5765,2037,D10LN,CAVSNMGYKLTF,CAWGTGENTLYF,TRA:CAVSNMGYKLTF&TRB:CAWGTGENTLYF,CD8-01-Sell-P.na,False


In [10]:
tcr_path <- function(sample) file.path('~/data1/eva/GSE178881/cellranger_out', sample, 'outs/per_sample_outs', sample, 
                                  'vdj_t/filtered_contig_annotations.csv')
tcr_list <- list()
for(sample in list.files('~/data1/eva/GSE178881/cellranger_out'))
{
    csv_i <- read.csv(tcr_path(sample))
    tcr_list[[sample]] <- csv_i
}
combined_tcr <- combineTCR(tcr_list, samples = names(tcr_list))

In [11]:
seu$sample <- seu$orig.ident
seu <- combineExpression(combined_tcr, seu, 
                  cloneCall="aa", 
                  group.by = "sample", 
                  proportion = FALSE, 
                  cloneSize=c(Single=1, Small=5, Medium=20, Large=100, Hyperexpanded=1000))

In [12]:
saveRDS(seu, 'data/GSE178881_tcr.rds')