In [1]:
# set up the model
setwd('/lustre/scratch117/cellgen/team297/kt16/newcastle_covid')
library(dplyr)

bcell.df <- read.csv("h5ad/covid_jan_2021_bcells_obs.csv")
clinical_info = read.csv('other_analyses/final_metadata_feb21.csv')


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
covid.meta <- bcell.df[,c('sample_id', 'patient_id', 'Status_on_day_collection_summary_v2', 'Age', 'Sex', 'Collection_Day', 'Days_from_onset')] %>% distinct()
rownames(covid.meta) <- covid.meta$sample_id
covid.meta$Age = clinical_info$Age[match(covid.meta$patient_id, clinical_info$patient_id)]

In [3]:
bcell.meta <- covid.meta[!covid.meta$Status_on_day_collection_summary_v2 %in% c("Non_covid", "LPS", 'Healthy'),]
bcell.meta$OrderedSeverity <- ordered(bcell.meta$Status_on_day_collection_summary_v2, levels=c("Asymptomatic", "Mild", "Moderate", "Severe", "Critical"))
bcell.meta['Days_from_onset'][bcell.meta['Days_from_onset'] == 'Not_known'] <- NA
bcell.meta['Days_from_onset'][bcell.meta['Days_from_onset'] == 'nan'] <- NA
bcell.meta['Days_from_onset'][bcell.meta['Days_from_onset'] == 'Healthy'] <- 0
bcell.meta <- bcell.meta[!is.na(bcell.meta['Days_from_onset']),]
bcell.df <- bcell.df %>% filter(sample_id %in% row.names(bcell.meta))
bcell.model <- model.matrix(~ Sex + Age + as.numeric(Days_from_onset) + OrderedSeverity, data=bcell.meta[bcell.meta$Collection_Day %in% c("D0"), ])

In [4]:
# count cells
cell.freq.tab <- t(table(bcell.df$sample_id[bcell.df$Collection_Day %in% c("D0") &
                                                        !bcell.df$Status_on_day_collection_summary_v2 %in% c("LPS", "Non_covid", 'Healthy')],
                         bcell.df$celltype_B[bcell.df$Collection_Day %in% c("D0") &
                                                        !bcell.df$Status_on_day_collection_summary_v2 %in% c("LPS", "Non_covid", 'Healthy')]))
cell.freq.tab <- cell.freq.tab[,colSums(cell.freq.tab) != 0]
bcell.model <- bcell.model[colnames(cell.freq.tab), ]
n.cell.vecc <- table(bcell.df$sample_id)

In [5]:
library(edgeR)
bcell.dge <- DGEList(cell.freq.tab, lib.size=log(n.cell.vecc[colnames(cell.freq.tab)]))

Loading required package: limma



In [6]:
#estimate dispersions and fit model
bcell.dge <- estimateDisp(bcell.dge, design=bcell.model)
bcell.linear.fit <- glmQLFit(bcell.dge, bcell.model, robust=TRUE)

In [7]:
# linear changes
bcell.res <- as.data.frame(topTags(glmQLFTest(bcell.linear.fit, coef=4), sort.by='none', n=Inf))
bcell.res$CellType <- rownames(bcell.res)
bcell.res$Sig <- as.numeric(bcell.res$FDR < 0.1 & bcell.res$PValue < 0.05)
bcell.res$Diff <- sign(bcell.res$logFC)
bcell.res$Diff[bcell.res$FDR >= 0.1 | bcell.res$PValue >= 0.05] <- 0
bcell.res

Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR,CellType,Sig,Diff
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
B_exhausted,-0.022000089,15.47603,2.5857822,0.11149871,0.1672481,B_exhausted,0,0
B_immature,-0.00995102,16.09104,0.7288711,0.39562518,0.3956252,B_immature,0,0
B_naive,0.004772062,19.17906,1.2735821,0.26223948,0.337165,B_naive,0,0
B_non-switched_memory,0.01636728,15.53889,2.7378201,0.10164998,0.1672481,B_non-switched_memory,0,0
B_switched_memory,0.019809624,16.57053,4.4268053,0.0383035,0.1149105,B_switched_memory,0,0
Plasma_cell_IgA,-0.035625748,15.44095,6.412884,0.01314884,0.0591698,Plasma_cell_IgA,1,-1
Plasma_cell_IgG,-0.015731866,15.62167,1.0443676,0.30967893,0.3483888,Plasma_cell_IgG,0,0
Plasma_cell_IgM,-0.027554743,14.21102,3.4250695,0.06765268,0.1522185,Plasma_cell_IgM,0,0
Plasmablast,-0.050900382,15.66965,6.8703183,0.01036463,0.0591698,Plasmablast,1,-1


In [8]:
# Quadratic changes.
bcell.quad.res <- as.data.frame(topTags(glmQLFTest(bcell.linear.fit, coef=5), sort.by='none', n=Inf))
bcell.quad.res$CellType <- rownames(bcell.quad.res)
bcell.quad.res$Sig <- as.numeric(bcell.quad.res$FDR < 0.1 & bcell.quad.res$PValue < 0.05)
bcell.quad.res$Diff <- sign(bcell.quad.res$logFC)
bcell.quad.res$Diff[bcell.quad.res$FDR >= 0.1 | bcell.quad.res$PValue >= 0.05] <- 0
bcell.quad.res

Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR,CellType,Sig,Diff
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
B_exhausted,0.50638457,15.47603,2.13745812,0.147390797,0.223010196,B_exhausted,0,0
B_immature,-0.03605731,16.09104,0.01579397,0.900283778,0.900283778,B_immature,0,0
B_naive,-0.15756913,19.17906,1.88402647,0.173452375,0.223010196,B_naive,0,0
B_non-switched_memory,-0.25783179,15.53889,0.95509224,0.331172309,0.372568847,B_non-switched_memory,0,0
B_switched_memory,-0.45695532,16.57053,3.32389167,0.071760524,0.161461178,B_switched_memory,0,0
Plasma_cell_IgA,0.50037281,15.44095,2.00559435,0.160337879,0.223010196,Plasma_cell_IgA,0,0
Plasma_cell_IgG,0.80302329,15.62167,4.25417278,0.042176302,0.126528905,Plasma_cell_IgG,0,0
Plasma_cell_IgM,1.57655146,14.21102,14.7475259,0.000234591,0.002111319,Plasma_cell_IgM,1,1
Plasmablast,1.53639973,15.66965,10.68238382,0.001556629,0.00700483,Plasmablast,1,1


## remove Critical group to check for robustness

In [1]:
# set up the model
setwd('/lustre/scratch117/cellgen/team297/kt16/newcastle_covid')
library(dplyr)

bcell.df <- read.csv("h5ad/covid_jan_2021_bcells_obs.csv")
clinical_info = read.csv('other_analyses/final_metadata_feb21.csv')


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [2]:
covid.meta <- bcell.df[,c('sample_id', 'patient_id', 'Status_on_day_collection_summary_v2', 'Age', 'Sex', 'Collection_Day', 'Days_from_onset')] %>% distinct()
rownames(covid.meta) <- covid.meta$sample_id
covid.meta$Age = clinical_info$Age[match(covid.meta$patient_id, clinical_info$patient_id)]

In [3]:
bcell.meta <- covid.meta[!covid.meta$Status_on_day_collection_summary_v2 %in% c("Non_covid", "LPS", 'Healthy', 'Critical'),]
bcell.meta$OrderedSeverity <- ordered(bcell.meta$Status_on_day_collection_summary_v2, levels=c("Asymptomatic", "Mild", "Moderate", "Severe"))
bcell.meta['Days_from_onset'][bcell.meta['Days_from_onset'] == 'Not_known'] <- NA
bcell.meta['Days_from_onset'][bcell.meta['Days_from_onset'] == 'nan'] <- NA
bcell.meta['Days_from_onset'][bcell.meta['Days_from_onset'] == 'Healthy'] <- 0
bcell.meta <- bcell.meta[!is.na(bcell.meta['Days_from_onset']),]
bcell.df <- bcell.df %>% filter(sample_id %in% row.names(bcell.meta))
bcell.model <- model.matrix(~ Sex + Age + as.numeric(Days_from_onset) + OrderedSeverity, data=bcell.meta[bcell.meta$Collection_Day %in% c("D0"), ])

In [4]:
# count cells
cell.freq.tab <- t(table(bcell.df$sample_id[bcell.df$Collection_Day %in% c("D0") &
                                                        !bcell.df$Status_on_day_collection_summary_v2 %in% c("LPS", "Non_covid", 'Healthy')],
                         bcell.df$celltype_B[bcell.df$Collection_Day %in% c("D0") &
                                                        !bcell.df$Status_on_day_collection_summary_v2 %in% c("LPS", "Non_covid", 'Healthy')]))
cell.freq.tab <- cell.freq.tab[,colSums(cell.freq.tab) != 0]
bcell.model <- bcell.model[colnames(cell.freq.tab), ]
n.cell.vecc <- table(bcell.df$sample_id)

In [5]:
library(edgeR)
bcell.dge <- DGEList(cell.freq.tab, lib.size=log(n.cell.vecc[colnames(cell.freq.tab)]))

Loading required package: limma



In [6]:
#estimate dispersions and fit model
bcell.dge <- estimateDisp(bcell.dge, design=bcell.model)
bcell.linear.fit <- glmQLFit(bcell.dge, bcell.model, robust=TRUE)

In [7]:
# linear changes
bcell.res <- as.data.frame(topTags(glmQLFTest(bcell.linear.fit, coef=4), sort.by='none', n=Inf))
bcell.res$CellType <- rownames(bcell.res)
bcell.res$Sig <- as.numeric(bcell.res$FDR < 0.1 & bcell.res$PValue < 0.05)
bcell.res$Diff <- sign(bcell.res$logFC)
bcell.res$Diff[bcell.res$FDR >= 0.1 | bcell.res$PValue >= 0.05] <- 0
bcell.res

Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR,CellType,Sig,Diff
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
B_exhausted,-0.033431267,15.45495,3.28299627,0.07390033,0.3145728,B_exhausted,0,0
B_immature,0.0027339951,16.07439,0.031530621,0.8595281,0.9228529,B_immature,0,0
B_naive,0.0005444977,19.17061,0.009439796,0.92285292,0.9228529,B_naive,0,0
B_non-switched_memory,0.0184035709,15.53814,2.039204173,0.15733381,0.3145728,B_non-switched_memory,0,0
B_switched_memory,0.0194959951,16.5903,2.81652355,0.09735324,0.3145728,B_switched_memory,0,0
Plasma_cell_IgA,-0.0235493759,15.46497,1.674859422,0.19947747,0.3145728,Plasma_cell_IgA,0,0
Plasma_cell_IgG,-0.0023611299,15.66264,0.015098097,0.90252682,0.9228529,Plasma_cell_IgG,0,0
Plasma_cell_IgM,-0.0274229775,14.24536,1.76902564,0.18742947,0.3145728,Plasma_cell_IgM,0,0
Plasmablast,-0.0320698858,15.68467,1.600023704,0.20971518,0.3145728,Plasmablast,0,0


In [8]:
# Quadratic changes.
bcell.quad.res <- as.data.frame(topTags(glmQLFTest(bcell.linear.fit, coef=5), sort.by='none', n=Inf))
bcell.quad.res$CellType <- rownames(bcell.quad.res)
bcell.quad.res$Sig <- as.numeric(bcell.quad.res$FDR < 0.1 & bcell.quad.res$PValue < 0.05)
bcell.quad.res$Diff <- sign(bcell.quad.res$logFC)
bcell.quad.res$Diff[bcell.quad.res$FDR >= 0.1 | bcell.quad.res$PValue >= 0.05] <- 0
bcell.quad.res

Unnamed: 0_level_0,logFC,logCPM,F,PValue,FDR,CellType,Sig,Diff
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>
B_exhausted,0.1616904,15.45495,0.2760204,0.6008314,0.6008314,B_exhausted,0,0
B_immature,-0.3098771,16.07439,1.4774286,0.2278914,0.3418371,B_immature,0,0
B_naive,-0.2113975,19.17061,4.3488903,0.04034672,0.0726241,B_naive,1,-1
B_non-switched_memory,-0.1353764,15.53814,0.3330929,0.5655274,0.6008314,B_non-switched_memory,0,0
B_switched_memory,-0.1482451,16.5903,0.4909614,0.485611,0.6008314,B_switched_memory,0,0
Plasma_cell_IgA,0.7192348,15.46497,4.844868,0.03072471,0.06913061,Plasma_cell_IgA,1,1
Plasma_cell_IgG,1.2398394,15.66264,11.7834434,0.000964669,0.002894007,Plasma_cell_IgG,1,1
Plasma_cell_IgM,1.8654506,14.24536,23.3519016,6.738117e-06,6.064305e-05,Plasma_cell_IgM,1,1
Plasmablast,1.5772031,15.68467,13.017067,0.0005464777,0.00245915,Plasmablast,1,1
