In [1]:
setwd('/lustre/scratch117/cellgen/team297/kt16/COVID_imperial_renal/')
source('scripts/glmm_functions.R')

In [2]:
# Basic parameters to use.
min_cells = 10
# ncpus = 10
BPPARAM = MulticoreParam(workers = 10, progress = TRUE)

In [3]:
sce <- readRDS('h5ad/df.fil3_gex_bcells_vdj_sce_B_naive_230622.RDS')
counts(sce) <- assays(sce)[['X']] # because i'm saving from a h5ad object with anndata2ri
sce$case_control <- factor(sce$case_control, levels = c('NEGATIVE', 'POSITIVE', 'RECOVERY'))
sce$WHO_temp_severity <- factor(sce$WHO_temp_severity, levels = c('NA', 'mild', 'moderate', 'severe', 'critical'))
sce$WHO_temp_severity_group <- factor(sce$WHO_temp_severity, levels = c('NA', 'mild', 'moderate', 'severe', 'critical'), labels = c('NA', 'mild_moderate', 'mild_moderate', 'severe_critical', 'severe_critical'))
sce$WHO_severity_group <- factor(sce$WHO_severity, levels = c('NA', 'mild', 'moderate', 'severe', 'critical'), labels = c('NA', 'mild_moderate', 'mild_moderate', 'severe_critical', 'severe_critical')) # interpreted as peak severity
sce$grouped_temp_severity <- ifelse(sce$WHO_temp_severity %in% c("mild", "moderate"), "mild_moderate", "severe_critical")
sce$grouped_severity <- ifelse(sce$WHO_severity %in% c("mild", "moderate"), "mild_moderate", "severe_critical")
sce$age_scaled <- scale(sce$calc_age) # scale age

In [4]:
colnames(colData(sce))

### Comparison 1: just wave 1, deg from positive vs negative

In [5]:
sce1 <- sce[, sce$centre == 'NCL']
# Remove samples with less than MIN
nCells <- table(sce1$sample_id)
rmSamples <- names(nCells[nCells < min_cells])
sce1 <- sce1[,!sce1$sample_id %in% rmSamples]
# Summarize Counts
smrzd <- aggregateAcrossCells(sce1, id=as.character(colData(sce1)[,c("sample_id")]))
y <- DGEList(counts=counts(smrzd), samples=colData(smrzd))
y1 <- setupDGElist(y, 'case_control')
# sanity check
table(y1$samples$case_control, y1$samples$individual_id)
table(y1$samples$case_control, y1$samples$sex)
table(y1$samples$case_control, y1$samples$corrected_ethnicity)

res1 <- testDGElist(y1, 
            formula = as.formula("~ case_control + sex + PC1_nonafricanVsAfrican + PC2_asianVsEuropean + age_scaled + (1|individual_id)"), 
            individual_id = 'individual_id',
            modified = TRUE,
            BPPARAM = BPPARAM,
           )

          
           C20 C21 C23 C31 C34 C36 C40 C42 C60 C63 C65 C69 C73 C82 C85 C93 C103
  NEGATIVE   0   0   0   0   0   0   0   0   0   0   0   1   0   0   1   0    1
  POSITIVE   3   3   3   3   3   3   3   3   3   3   2   0   1   1   0   3    0
          
           C104 C106 C113 C123 C124 C126 C127 C128 C132 C134 C142 C161 C164
  NEGATIVE    1    1    0    0    0    0    0    0    0    1    1    1    1
  POSITIVE    0    0    3    3    3    3    3    3    3    0    0    0    0
          
           C167 C193 C195 C196 C197 C212 C214 C224 C234 C237 C238 C240 C251
  NEGATIVE    1    1    1    1    1    1    1    1    1    1    1    1    1
  POSITIVE    0    0    0    0    0    0    0    0    0    0    0    0    0

          
            F  M
  NEGATIVE  7 15
  POSITIVE 19 39

          
           African Asian European
  NEGATIVE       4    10        8
  POSITIVE       9    29       20


n = 80 samples, 43 individuals

Time difference of 1.926623 mins

q_case_control
--------------
Not Significant     Significant 
           8270             603 

q_sex
-----
Not Significant     Significant 
           8792              81 

q_PC1_nonafricanVsAfrican
-------------------------
Not Significant     Significant 
           8867               6 

q_PC2_asianVsEuropean
---------------------
Not Significant     Significant 
           8870               3 

q_age_scaled
------------
Not Significant     Significant 
           8820              53 


In [6]:
results1 <- degTable_modified(res1, contrast = 'case_control', group = 'POSITIVE')

### Comparison 2: just patients that were negative in wave 1 but positive in wave 2, deg from recovery vs negative

In [7]:
sce2 <- sce[, sce$individual_id %in% c('C101', 'C108', 'C137', 'C138', 'C140', 
                                       'C145', 'C146', 'C147', 'C168', # C141 remove beause of infection in receovery sample
                                        'C169', 'C170', 'C187', 'C190', 'C33')]
# Remove samples with less than MIN
nCells <- table(sce2$sample_id)
rmSamples <- names(nCells[nCells < min_cells])
sce2 <- sce2[,!sce2$sample_id %in% rmSamples]
# remove non-complete data (all have positive), 1 = negative, 3 = recovery
df <- table(sce2$individual_id, sce2$case_control)
keep_ids <- row.names(df)[which(df[,1] != 0 & df[,3] != 0)]
sce2 <- sce2[, sce2$individual_id %in% keep_ids]
# Summarize Counts
smrzd <- aggregateAcrossCells(sce2, id=as.character(colData(sce2)[,c("sample_id")]))
y <- DGEList(counts=counts(smrzd), samples=colData(smrzd))
y2 <- setupDGElist(y, 'case_control', remove = 'POSITIVE')
# sanity check
table(y2$samples$case_control, y2$samples$individual_id)
table(y2$samples$case_control, y2$samples$sex)
table(y2$samples$case_control, y2$samples$corrected_ethnicity)

res2 <- testDGElist(y2, 
            formula = as.formula("~ case_control + sex + PC1_nonafricanVsAfrican + PC2_asianVsEuropean + age_scaled + (1|individual_id)"), 
            individual_id = 'individual_id',
            modified = TRUE,
            BPPARAM = BPPARAM
           )

          
           C137 C140 C146 C168 C169 C187 C190
  NEGATIVE    1    1    1    1    1    1    1
  RECOVERY    1    1    1    1    1    1    1

          
           F M
  NEGATIVE 3 4
  RECOVERY 3 4

          
           African Asian European
  NEGATIVE       0     5        2
  RECOVERY       0     5        2


n = 14 samples, 7 individuals

Time difference of 2.063569 mins

q_case_control
--------------
Not Significant     Significant 
           8317               2 

q_sex
-----
Not Significant     Significant 
           8316               3 

q_PC1_nonafricanVsAfrican
-------------------------
Not Significant     Significant 
           8317               2 

q_PC2_asianVsEuropean
---------------------
Not Significant 
           8319 

q_age_scaled
------------
Not Significant     Significant 
           8317               2 


In [8]:
results2 <- degTable_modified(res2, contrast = 'case_control', group = 'RECOVERY')

### Comparison 3: just patients that were negative in wave 1 but positive in wave 2, deg from positive vs negative

In [9]:
sce3 <- sce[, sce$individual_id %in% c('C101', 'C108', 'C137', 'C138', 'C140', 
                                       'C145', 'C146', 'C147', 'C168', 
                                        'C169', 'C170', 'C187', 'C190', 'C33')]
# Remove samples with less than MIN
nCells <- table(sce3$sample_id)
rmSamples <- names(nCells[nCells < min_cells])
sce3 <- sce3[,!sce3$sample_id %in% rmSamples]
# remove non-complete data (all have positive), 1 = negative, 2 = positive
df <- table(sce3$individual_id, sce3$case_control)
keep_ids <- row.names(df)[which(df[,1] != 0 & df[,2] != 0)]
sce3 <- sce3[, sce3$individual_id %in% keep_ids]
# Summarize Counts
smrzd <- aggregateAcrossCells(sce3, id=as.character(colData(sce3)[,c("sample_id")]))
y <- DGEList(counts=counts(smrzd), samples=colData(smrzd))
y3 <- setupDGElist(y, 'case_control', remove = 'RECOVERY')
# sanity check
table(y3$samples$case_control, y3$samples$individual_id)
table(y3$samples$case_control, y3$samples$sex)
table(y3$samples$case_control, y3$samples$corrected_ethnicity)

res3 <- testDGElist(y3, 
            formula = as.formula("~ case_control + sex + PC1_nonafricanVsAfrican + PC2_asianVsEuropean + age_scaled + (1|individual_id)"), 
            individual_id = 'individual_id',
            modified = TRUE,
            BPPARAM = BPPARAM,
           )

          
           C137 C138 C140 C145 C146 C147 C168 C169 C187 C190
  NEGATIVE    1    1    1    1    1    1    1    1    1    1
  POSITIVE    2    6    6    5    6    5    4    5    3    6

          
            F  M
  NEGATIVE  5  5
  POSITIVE 24 24

          
           African Asian European
  NEGATIVE       0     8        2
  POSITIVE       0    39        9


n = 58 samples, 10 individuals

Time difference of 3.13898 mins

q_case_control
--------------
Not Significant     Significant 
           9155              27 

q_sex
-----
Not Significant     Significant 
           9148              34 

q_PC1_nonafricanVsAfrican
-------------------------
Not Significant     Significant 
           9181               1 

q_PC2_asianVsEuropean
---------------------
Not Significant     Significant 
           9178               4 

q_age_scaled
------------
Not Significant     Significant 
           9174               8 


In [10]:
results3 <- degTable_modified(res3, contrast = 'case_control', group = 'POSITIVE')

In [11]:
save(res1, res2, res3, results1, results2, results3, file = 'h5ad/df.fil3_gex_bcells_vdj_sce_B_naive_deg_270622.RData')