In [1]:
setwd('/lustre/scratch117/cellgen/team297/kt16/COVID_imperial_renal')

In [2]:
suppressPackageStartupMessages(library(SingleCellExperiment))
suppressPackageStartupMessages(library(edgeR))
suppressPackageStartupMessages(library(scran))
suppressPackageStartupMessages(library(scater))
suppressPackageStartupMessages(library(glmmSeq))

In [3]:
sce <- readRDS('h5ad/df.fil3_gex_bcells_vdj_sce_B_ASC_IgG.RDS')
counts(sce) <- assays(sce)[['X']]
sce$WHO_severity <- factor(sce$WHO_severity, levels = c('NA', 'mild', 'moderate', 'severe', 'critical'))

# Remove samples with less than 5 cells
nCells <- table(sce$sample_id)
rmSamples <- names(nCells[nCells<5])
sce <- sce[,!sce$sample_id %in% rmSamples]
# Summarize Counts
smrzd <- aggregateAcrossCells(sce, id=as.character(colData(sce)[,c("sample_id")]))
y <- DGEList(counts=counts(smrzd), samples=colData(smrzd))
keep <- filterByExpr(y, group=y$samples$case_control, min.count=3, min.total.count=5)
y <- y[keep,]
# ensure the factor levels are correct
y$samples$case_control <- droplevels(y$samples$case_control)
y$samples$WHO_severity <- droplevels(y$samples$WHO_severity)
y$samples$WHO_severity <- ordered(y$samples$WHO_severity) # ordered it so that i can test linear/quadratic trends in the model
# Estimate Dispersion
disp  <- suppressMessages(setNames(edgeR::estimateDisp(y)$tagwise.dispersion, rownames(y)))
# Norm
sizeFactors <- calcNormFactors(y$counts)

In [4]:
results <- glmmSeq(~ case_control + sex + ethnicity + calc_age + (1|individual_id),
                  id = "individual_id",
                  countdata = y$counts,
                  metadata = y$samples,
                  dispersion = disp,
                  sizeFactors = sizeFactors,
                  removeDuplicatedMeasures = FALSE,
                  removeSingles=FALSE,
                  progress=TRUE, cores = 24)


n = 60 samples, 33 individuals
Time difference of 10.9745 mins


In [6]:
results <- glmmQvals(results, pi0=1)


q_case_control
--------------
Not Significant     Significant 
           6435             629 

q_sex
-----
Not Significant     Significant 
           6153             911 

q_ethnicity
-----------
Not Significant     Significant 
           6391             673 

q_calc_age
----------
Not Significant     Significant 
           6644             420 


In [7]:
head(results@stats)

Unnamed: 0,Dispersion,AIC,logLik,(Intercept),case_controlPOSITIVE,sexM,ethnicityblack,ethnicityother,ethnicitywhite,calc_age,⋯,Chisq_ethnicity,Chisq_calc_age,P_case_control,P_sex,P_ethnicity,P_calc_age,q_case_control,q_sex,q_ethnicity,q_calc_age
NOC2L,0.026887873,604.8409,-293.42043,0.5688707,0.3243169,1.0083362,1.667141,0.91983028,0.43025995,0.002311535,⋯,5.265048,0.02143256,0.5167045,0.01785554,0.15338652,0.8836067,0.7782192,0.0637941,0.2228553,0.9993565
ISG15,1.201505505,634.2046,-308.10228,2.9662802,0.495138,0.746289,1.063126,0.59653748,0.67777359,-0.011547584,⋯,1.811781,0.26852325,0.4928229,0.20314163,0.61237462,0.6043241,0.7723508,0.21712702,0.6193006,0.9993565
TNFRSF18,1.090274053,200.5393,-91.26966,-2.7801456,-0.2275657,1.2437907,1.629526,-0.06247041,-0.03823493,0.029907772,⋯,5.354545,1.78833913,0.7263888,0.01556542,0.14760192,0.1811287,0.847883,0.06341166,0.2190922,0.9993565
SDF4,0.074926743,795.3678,-388.68391,1.651309,0.3175639,1.0784994,1.962467,0.99151372,0.61751316,0.006016757,⋯,7.068579,0.14288747,0.5287505,0.01174592,0.06974375,0.7054272,0.7807927,0.05967859,0.1910931,0.9993565
B3GALT6,0.205783835,329.7748,-155.88741,0.1149836,0.1094871,0.6801167,1.33221,0.17672941,0.4689956,-0.002126264,⋯,2.684337,0.01283557,0.8513612,0.16763802,0.44289558,0.9097973,0.914401,0.1838812,0.4579028,0.9993565
UBE2J2,0.008795659,710.591,-346.29549,0.6491027,0.1738913,0.8798714,1.379543,0.60467999,0.40480242,0.008519946,⋯,4.032669,0.33524928,0.7068704,0.02557313,0.25795801,0.5625843,0.84084,0.0680409,0.2977476,0.9993565


In [28]:
tmp <- data.frame(results@stats[,c('case_controlPOSITIVE', 'P_case_control', 'q_case_control')])
colnames(tmp) <- c('fixed-effects estimates', 'pval', 'qval')
tmp <- tmp[tmp$qval < 0.05,]
tmp <- tmp[order(-tmp$`fixed-effects estimates`, tmp$qval), ]
head(tmp, 30)

Unnamed: 0_level_0,fixed-effects estimates,pval,qval
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
IGHE,2.0655758,0,0
AC141272.1,1.9022946,0,0
SASS6,1.3596144,0,0
CDCA8,1.3409471,0,0
SEPT10,1.3221143,0,0
FBXL8,1.2557856,0,0
Z93241.1,1.2298445,0,0
KIF11,1.2121895,0,0
XPO1,1.1847144,0,0
UBE2T,1.1841967,0,0


In [29]:
tail(tmp, 30)

Unnamed: 0_level_0,fixed-effects estimates,pval,qval
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
LAPTM4A,-0.1008413,5.558766e-235,7.139477e-234
ATP23,-0.1117393,0.0,0.0
RGS2,-0.1127026,3.847354e-10,4.397687e-09
PCBD1,-0.1213554,2.96475e-174,3.733154e-173
MARCH5,-0.1214197,0.0,0.0
MANEA-DT,-0.1237281,5.766788e-309,7.515976e-308
PDS5A,-0.1335985,0.0,0.0
PBX2,-0.1476914,0.0,0.0
KCNMA1,-0.1554643,1.241281e-19,1.4398050000000001e-18
NPIPB15,-0.1650113,3.123512e-61,3.695894e-60


### run it as a job with a Rscript

```bash
Rscript --vanilla glmmSeq_jobs.R \
       -i /lustre/scratch117/cellgen/team297/kt16/COVID_imperial_renal/h5ad/df.fil3_gex_bcells_vdj_sce_B_ASC_dividing.RDS \
       -o /lustre/scratch117/cellgen/team297/kt16/COVID_imperial_renal/h5ad/glmmSeq_B_ASC_dividing.RData \
       -m 5 \
       -n 24

saving the various results as a .Rdata to open later
```