In [1]:
setwd('/lustre/scratch117/cellgen/team297/kt16/COVID_imperial_renal/')
source('scripts/glmm_functions.R')

In [2]:
testDGElist_edger <- function(y, model) {
    # Norm
    y <- calcNormFactors(y)
    # Estimate Dispersion
    y <- estimateDisp(y, model)
    # Fit Model
    fit <- glmQLFit(y, mdl, robust=TRUE)
    return(fit)
}

prep_sce <- function(sce, min_cells = 10){
    counts(sce) <- assays(sce)[['X']] # because i'm saving from a h5ad object with anndata2ri
    sce$case_control <- factor(sce$case_control, levels = c('NEGATIVE', 'POSITIVE', 'RECOVERY'))
    sce$WHO_temp_severity <- factor(sce$WHO_temp_severity, levels = c('NA', 'mild', 'moderate', 'severe', 'critical'))
    sce$WHO_temp_severity_group <- factor(sce$WHO_temp_severity, levels = c('NA', 'mild', 'moderate', 'severe', 'critical'), labels = c('NA', 'mild_moderate', 'mild_moderate', 'severe_critical', 'severe_critical'))
    sce$WHO_severity_group <- factor(sce$WHO_severity, levels = c('NA', 'mild', 'moderate', 'severe', 'critical'), labels = c('NA', 'mild_moderate', 'mild_moderate', 'severe_critical', 'severe_critical')) # interpreted as peak severity
    sce$grouped_temp_severity <- ifelse(sce$WHO_temp_severity %in% c("mild", "moderate"), "mild_moderate", "severe_critical")
    sce$grouped_severity <- ifelse(sce$WHO_severity %in% c("mild", "moderate"), "mild_moderate", "severe_critical")
    sce$prognosis <- factor(sce$prognosis, levels = c('stable_disease', 'will_improve', 'will_worsen'))
    sce$age_scaled <- scale(sce$calc_age) # scale age
    # Remove samples with less than 10 cells
    nCells <- table(sce$sample_id)
    rmSamples <- names(nCells[nCells < min_cells])
    sce1 <- sce[, !sce$sample_id %in% rmSamples]
    # Summarize Counts
    smrzd <- aggregateAcrossCells(sce1, id = as.character(colData(sce1)[, c("sample_id")]))
    y <- DGEList(counts = counts(smrzd), samples = colData(smrzd))
    y1 <- setupDGElist(y, "prognosis") # use grouped_temp_severity later
    # sanity check
    # table(y1$samples$prognosis, y1$samples$individual_id)
    # table(y1$samples$prognosis, y1$samples$centre)
    # table(y1$samples$prognosis, y1$samples$sex)
    # table(y1$samples$prognosis, y1$samples$ethnicity)
    # table(y1$samples$prognosis, y1$samples$WHO_severity)
    # table(y1$samples$prognosis, y1$samples$WHO_temp_severity)
    return(y1)
}

### Naive

In [3]:
sce <- readRDS('h5ad/df.fil3_gex_bcells_vdj_sce_B_naive_prog.RDS')
y <- prep_sce(sce)

In [4]:
mdl <- model.matrix(~ prognosis + sex + ethnicity + age_scaled + centre, data=y$samples)
fit <- testDGElist_edger(y, mdl)
res1 <- glmQLFTest(fit, coef='prognosiswill_improve')
res2 <- glmQLFTest(fit, coef='prognosiswill_worsen')
print('prognosiswill_improve')
if(any(topTags(res1,n=nrow(y))$table$FDR < 0.05)){
    topTags(res1,n=nrow(y))$table[topTags(res1,n=nrow(y))$table$FDR < 0.05, ]
}
print('prognosiswill_worsen')
if(any(topTags(res2,n=nrow(y))$table$FDR < 0.05)){
    topTags(res2,n=nrow(y))$table[topTags(res2,n=nrow(y))$table$FDR < 0.05, ]
}

[1] "prognosiswill_improve"
[1] "prognosiswill_worsen"


### Switched memory

In [5]:
sce <- readRDS('h5ad/df.fil3_gex_bcells_vdj_sce_B_switched_mem_prog.RDS')
y <- prep_sce(sce)

In [6]:
mdl <- model.matrix(~ prognosis + sex + ethnicity + age_scaled + centre, data=y$samples)
fit <- testDGElist_edger(y, mdl)
res1 <- glmQLFTest(fit, coef='prognosiswill_improve')
res2 <- glmQLFTest(fit, coef='prognosiswill_worsen')
print('prognosiswill_improve')
if(any(topTags(res1,n=nrow(y))$table$FDR < 0.05)){
    topTags(res1,n=nrow(y))$table[topTags(res1,n=nrow(y))$table$FDR < 0.05, ]
}
print('prognosiswill_worsen')
if(any(topTags(res2,n=nrow(y))$table$FDR < 0.05)){
    topTags(res2,n=nrow(y))$table[topTags(res2,n=nrow(y))$table$FDR < 0.05, ]
}

[1] "prognosiswill_improve"
[1] "prognosiswill_worsen"


Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
APOL6,2.662733,5.245481,26.21959,3.543012e-06,0.0214494


### Non-switched memory

In [7]:
sce <- readRDS('h5ad/df.fil3_gex_bcells_vdj_sce_B_nonswitched_mem_prog.RDS')
y <- prep_sce(sce)

In [8]:
mdl <- model.matrix(~ prognosis + sex + ethnicity + age_scaled + centre, data=y$samples)
fit <- testDGElist_edger(y, mdl)
res1 <- glmQLFTest(fit, coef='prognosiswill_improve')
res2 <- glmQLFTest(fit, coef='prognosiswill_worsen')
print('prognosiswill_improve')
if(any(topTags(res1,n=nrow(y))$table$FDR < 0.05)){
    topTags(res1,n=nrow(y))$table[topTags(res1,n=nrow(y))$table$FDR < 0.05, ]
}
print('prognosiswill_worsen')
if(any(topTags(res2,n=nrow(y))$table$FDR < 0.05)){
    topTags(res2,n=nrow(y))$table[topTags(res2,n=nrow(y))$table$FDR < 0.05, ]
}

[1] "prognosiswill_improve"


Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
HLA-DQA2,5.40681,6.926207,33.5697,7.234374e-09,4.54174e-05
HIST1H4C,-5.442753,7.965718,32.12441,1.524458e-08,4.785274e-05
IFI44L,3.674687,7.329106,29.55245,5.544136e-08,0.0001160203
XAF1,2.847235,7.407392,21.21707,4.141275e-06,0.006499731
DDIT4,2.42162,6.784179,18.34552,1.856224e-05,0.02330675
MT-CO1,-1.510628,13.227611,17.44501,2.977373e-05,0.03001956
IFI27,6.811807,5.794524,17.2222,3.347196e-05,0.03001956


[1] "prognosiswill_worsen"


Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
HLA-DQA2,32.12245,6.926207,64.61085,1.09555e-15,6.877863e-12
IFI44L,15.310838,7.329106,32.53857,1.194881e-08,3.750733e-05
CD1C,-16.569929,7.643386,22.61363,2.002761e-06,0.00419111
DDIT4,10.320847,6.784179,18.99759,1.319242e-05,0.01937076
IFITM1,8.926336,9.47088,19.35001,1.542749e-05,0.01937076
IFI27,32.005993,5.794524,17.58081,2.772413e-05,0.02564488
XAF1,9.963111,7.407392,17.52197,2.859416e-05,0.02564488
HIST1H4C,-16.770996,7.965718,17.20274,3.413407e-05,0.02678671


### CD11c

In [10]:
sce <- readRDS('h5ad/df.fil3_gex_bcells_vdj_sce_B_CD11c_prog.RDS')
y <- prep_sce(sce)

In [11]:
mdl <- model.matrix(~ prognosis + sex + ethnicity + age_scaled + centre, data=y$samples)
fit <- testDGElist_edger(y, mdl)
res1 <- glmQLFTest(fit, coef='prognosiswill_improve')
res2 <- glmQLFTest(fit, coef='prognosiswill_worsen')
print('prognosiswill_improve')
if(any(topTags(res1,n=nrow(y))$table$FDR < 0.05)){
    topTags(res1,n=nrow(y))$table[topTags(res1,n=nrow(y))$table$FDR < 0.05, ]
}
print('prognosiswill_worsen')
if(any(topTags(res2,n=nrow(y))$table$FDR < 0.05)){
    topTags(res2,n=nrow(y))$table[topTags(res2,n=nrow(y))$table$FDR < 0.05, ]
}

[1] "prognosiswill_improve"


Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
RPS28,-1.613656,12.305087,23.45563,3.613437e-06,0.01515475
PCIF1,-6.316215,6.939713,19.37047,2.242169e-05,0.04701829


[1] "prognosiswill_worsen"


Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
JUND,-5.441748,10.64338,47.23986,4.352746e-09,1.825542e-05


### All memory

In [12]:
sce <- readRDS('h5ad/df.fil3_gex_bcells_vdj_sce_B_mem_all_prog.RDS')
y <- prep_sce(sce)

In [13]:
mdl <- model.matrix(~ prognosis + sex + ethnicity + age_scaled + centre, data=y$samples)
fit <- testDGElist_edger(y, mdl)
res1 <- glmQLFTest(fit, coef='prognosiswill_improve')
res2 <- glmQLFTest(fit, coef='prognosiswill_worsen')
print('prognosiswill_improve')
if(any(topTags(res1,n=nrow(y))$table$FDR < 0.05)){
    topTags(res1,n=nrow(y))$table[topTags(res1,n=nrow(y))$table$FDR < 0.05, ]
}
print('prognosiswill_worsen')
if(any(topTags(res2,n=nrow(y))$table$FDR < 0.05)){
    topTags(res2,n=nrow(y))$table[topTags(res2,n=nrow(y))$table$FDR < 0.05, ]
}

[1] "prognosiswill_improve"


Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
NR4A2,-1.52748,5.940416,26.87522,1.438281e-06,0.00817519


[1] "prognosiswill_worsen"


### All ASCs

In [14]:
sce <- readRDS('h5ad/df.fil3_gex_bcells_vdj_sce_B_ASC_all_prog.RDS')
y <- prep_sce(sce)

In [15]:
mdl <- model.matrix(~ prognosis + sex + ethnicity + age_scaled + centre, data=y$samples)
fit <- testDGElist_edger(y, mdl)
res1 <- glmQLFTest(fit, coef='prognosiswill_improve')
res2 <- glmQLFTest(fit, coef='prognosiswill_worsen')
print('prognosiswill_improve')
if(any(topTags(res1,n=nrow(y))$table$FDR < 0.05)){
    topTags(res1,n=nrow(y))$table[topTags(res1,n=nrow(y))$table$FDR < 0.05, ]
}
print('prognosiswill_worsen')
if(any(topTags(res2,n=nrow(y))$table$FDR < 0.05)){
    topTags(res2,n=nrow(y))$table[topTags(res2,n=nrow(y))$table$FDR < 0.05, ]
}

[1] "prognosiswill_improve"
[1] "prognosiswill_worsen"


Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
CCL3,13.324431,4.368481,79.18446,8.997033e-10,8.542683e-06
HIST1H2AI,11.719482,4.313584,60.01764,1.566622e-08,7.437539e-05
U62317.5,7.116824,4.322021,35.32142,1.889886e-06,0.00598149
HIST1H2AH,6.946506,4.315641,29.59523,7.560228e-06,0.01794609


### Switched ASCs

In [16]:
sce <- readRDS('h5ad/df.fil3_gex_bcells_vdj_sce_B_ASC_switched_prog.RDS')
y <- prep_sce(sce)

In [17]:
mdl <- model.matrix(~ prognosis + sex + ethnicity + age_scaled + centre, data=y$samples)
fit <- testDGElist_edger(y, mdl)
res1 <- glmQLFTest(fit, coef='prognosiswill_improve')
res2 <- glmQLFTest(fit, coef='prognosiswill_worsen')
print('prognosiswill_improve')
if(any(topTags(res1,n=nrow(y))$table$FDR < 0.05)){
    topTags(res1,n=nrow(y))$table[topTags(res1,n=nrow(y))$table$FDR < 0.05, ]
}
print('prognosiswill_worsen')
if(any(topTags(res2,n=nrow(y))$table$FDR < 0.05)){
    topTags(res2,n=nrow(y))$table[topTags(res2,n=nrow(y))$table$FDR < 0.05, ]
}

[1] "prognosiswill_improve"
[1] "prognosiswill_worsen"


Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
U62317.5,7.408952,4.360205,36.41026,2.032139e-06,0.01811449
HIST1H2AI,11.741972,4.3934,39.24294,5.162387e-06,0.02300876
