In [1]:
# set up the model
setwd('/lustre/scratch117/cellgen/team297/kt16/newcastle_covid')
library(dplyr)

bcell.df <- read.csv("h5ad/covid_jan_2021_bcells_obs.csv")
clinical_info = read.csv('other_analyses/final_metadata_feb21.csv')


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
covid.meta <- bcell.df[,c('sample_id', 'patient_id', 'Status_on_day_collection_summary_v2', 'Age', 'Sex', 'Collection_Day')] %>% distinct()
rownames(covid.meta) <- covid.meta$sample_id
covid.meta$Age = clinical_info$Age[match(covid.meta$patient_id, clinical_info$patient_id)]

In [3]:
bcell.meta <- covid.meta[!covid.meta$Status_on_day_collection_summary_v2 %in% c("Non_covid", "LPS", 'Healthy'),]
bcell.meta$OrderedSeverity <- ordered(bcell.meta$Status_on_day_collection_summary_v2, levels=c("Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))

bcell.model <- model.matrix(~ Sex + Age + OrderedSeverity, data=bcell.meta[bcell.meta$Collection_Day %in% c("D0"), ])

In [4]:
# count cells
cell.freq.tab <- t(table(bcell.df$sample_id[bcell.df$Collection_Day %in% c("D0") &
                                                        !bcell.df$Status_on_day_collection_summary_v2 %in% c("LPS", "Non_covid", 'Healthy')],
                         bcell.df$celltype_B[bcell.df$Collection_Day %in% c("D0") &
                                                        !bcell.df$Status_on_day_collection_summary_v2 %in% c("LPS", "Non_covid", 'Healthy')]))
cell.freq.tab <- cell.freq.tab[,colSums(cell.freq.tab) != 0]
bcell.model <- bcell.model[colnames(cell.freq.tab), ]
n.cell.vecc <- table(bcell.df$sample_id)

In [5]:
library(edgeR)
bcell.dge <- DGEList(cell.freq.tab, lib.size=log(n.cell.vecc[colnames(cell.freq.tab)]))

Loading required package: limma



In [6]:
#estimate dispersions and fit model
bcell.dge <- estimateDisp(bcell.dge, design=bcell.model)
bcell.linear.fit <- glmQLFit(bcell.dge, bcell.model, robust=TRUE)

In [7]:
# linear changes
bcell.res <- as.data.frame(topTags(glmQLFTest(bcell.linear.fit, coef=4), sort.by='none', n=Inf))
bcell.res$CellType <- rownames(bcell.res)
bcell.res$Sig <- as.numeric(bcell.res$FDR < 0.1 & bcell.res$PValue < 0.05)
bcell.res$Diff <- sign(bcell.res$logFC)
bcell.res$Diff[bcell.res$FDR >= 0.1 | bcell.res$PValue >= 0.05] <- 0
bcell.res

Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR,CellType,Sig,Diff
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
B_exhausted,0.22131785,15.45372,0.48290881,0.488932038,0.714852856,B_exhausted,0,0
B_immature,-0.11593271,16.09162,0.20070521,0.655249345,0.737155514,B_immature,0,0
B_naive,-0.08529095,19.18145,0.72738516,0.396039264,0.712870675,B_naive,0,0
B_non-switched_memory,-0.05386057,15.53769,0.05029615,0.851077066,0.851077066,B_non-switched_memory,0,0
B_switched_memory,-0.20204907,16.56326,0.90466635,0.344129434,0.712870675,B_switched_memory,0,0
Plasma_cell_IgA,0.20072384,15.45036,0.34934011,0.555996665,0.714852856,Plasma_cell_IgA,0,0
Plasma_cell_IgG,0.65730324,15.62127,3.22947933,0.075742078,0.227226234,Plasma_cell_IgG,0,0
Plasma_cell_IgM,1.28896504,14.22155,11.3871304,0.001100284,0.009902554,Plasma_cell_IgM,1,1
Plasmablast,1.13575466,15.67385,6.24262114,0.014322106,0.064449477,Plasmablast,1,1


In [8]:
# Quadratic changes.
bcell.quad.res <- as.data.frame(topTags(glmQLFTest(bcell.linear.fit, coef=5), sort.by='none', n=Inf))
bcell.quad.res$CellType <- rownames(bcell.quad.res)
bcell.quad.res$Sig <- as.numeric(bcell.quad.res$FDR < 0.1 & bcell.quad.res$PValue < 0.05)
bcell.quad.res$Diff <- sign(bcell.quad.res$logFC)
bcell.quad.res$Diff[bcell.quad.res$FDR >= 0.1 | bcell.quad.res$PValue >= 0.05] <- 0
bcell.quad.res

Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR,CellType,Sig,Diff
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
B_exhausted,-0.03071614,15.45372,0.01148359,0.9149039,0.9149039,B_exhausted,0,0
B_immature,0.25124007,16.09162,1.21253545,0.2738228,0.4107342,B_immature,0,0
B_naive,0.14133277,19.18145,2.53577183,0.1148657,0.2584478,B_naive,0,0
B_non-switched_memory,0.03428311,15.53769,0.02586696,0.872593,0.9149039,B_non-switched_memory,0,0
B_switched_memory,0.10268311,16.56326,0.29037912,0.5913335,0.7602859,B_switched_memory,0,0
Plasma_cell_IgA,-0.40145452,15.45036,1.66887306,0.1997801,0.3596042,Plasma_cell_IgA,0,0
Plasma_cell_IgG,-0.97788472,15.62127,8.47653017,0.004553699,0.02049165,Plasma_cell_IgG,1,-1
Plasma_cell_IgM,-1.6167795,14.22155,22.9346445,6.70343e-06,6.033087e-05,Plasma_cell_IgM,1,-1
Plasmablast,-1.0877653,15.67385,6.87280962,0.01030522,0.03091567,Plasmablast,1,-1
