In [None]:
import numpy as np
import pandas as pd
import scanpy as sc # v1.6",
import sys
import matplotlib.pyplot as plt
import os.path
import anndata
from matplotlib import rcParams
import seaborn as sns
import numba
import mnnpy
import scipy

In [None]:
#import full COVID-19 PBMC dataset
os.chdir('/home/ngr18/covid/')
covid_total = sc.read_h5ad('covid.h5ad')

In [None]:
#Subset PBMC data to myeloid populations and reorder categories (for dotplot visualisations)

blood_myeloid = covid_total[covid_total.obs.full_clustering.isin(['CD83_CD14_mono', 'CD14_mono', 
                                                                  'CD16_mono', 'C1_CD16_mono',
                                                                 'DC1', 'DC2', 'DC3', 'ASDC', 'pDC', 'DC_prolif',
                                                                 'Mono_prolif']),:]

blood_myeloid.obs.full_clustering = blood_myeloid.obs.full_clustering.cat.reorder_categories([
'DC1', 'DC2', 'DC3', 'ASDC', 'pDC', 'DC_prolif', 
    'CD83_CD14_mono', 'CD14_mono', 'CD16_mono', 'C1_CD16_mono', 'Mono_prolif'])

In [None]:
#Import BAL data GSE145926 (reannotated for DC subsets)

os.chdir('/home/ngr18/covid/external_dataset/BAL')
bal = sc.read_h5ad('full_bal.h5ad')

In [None]:
#Subset BAL data to myeloid populations

bal_myeloid = bal[bal.obs.full_clustering.isin(['DC1', 'DC2', 'mat_DC', 'pDC', 'Mac', 'Prolif_mac']),:]

In [None]:
#Concatenate PBMC and BAL data

myeloid = anndata.concat([bal_myeloid, blood_myeloid], index_unique = None)

In [None]:
#Normalise and scale complete object

sc.pp.normalize_total(myeloid, target_sum=1e4)
sc.pp.log1p(myeloid)
sc.pp.scale(myeloid, max_value=10)

In [None]:
#Add scores

#https://www.gsea-msigdb.org/gsea/msigdb/cards/HALLMARK_TNFA_SIGNALING_VIA_NFKB.html
TNF_genes = ["ABCA1", 	"ACKR3", 	"AREG", 	"ATF3", 	"ATP2B1", 	"B4GALT1", 	"B4GALT5", 	"BCL2A1", 	"BCL3", 	"BCL6", 	"BHLHE40", 	"BIRC2", 	"BIRC3", 	"BMP2", 	"BTG1", 	"BTG2", 	"BTG3", 	"CCL2", 	"CCL20", 	"CCL4", 	"CCL5", 	"CCN1", 	"CCND1", 	"CCNL1", 	"CCRL2", 	"CD44", 	"CD69", 	"CD80", 	"CD83", 	"CDKN1A", 	"CEBPB", 	"CEBPD", 	"CFLAR", 	"CLCF1", 	"CSF1", 	"CSF2", 	"CXCL1", 	"CXCL10", 	"CXCL11", 	"CXCL2", 	"CXCL3", 	"CXCL6", 	"DDX58", 	"DENND5A", 	"DNAJB4", 	"DRAM1", 	"DUSP1", 	"DUSP2", 	"DUSP4", 	"DUSP5", 	"EDN1", 	"EFNA1", 	"EGR1", 	"EGR2", 	"EGR3", 	"EHD1", 	"EIF1", 	"ETS2", 	"F2RL1", 	"F3", 	"FJX1", 	"FOS", 	"FOSB", 	"FOSL1", 	"FOSL2", 	"FUT4", 	"G0S2", 	"GADD45A", 	"GADD45B", 	"GCH1", 	"GEM", 	"GFPT2", 	"GPR183", 	"HBEGF", 	"HES1", 	"ICAM1", 	"ICOSLG", 	"ID2", 	"IER2", 	"IER3", 	"IER5", 	"IFIH1", 	"IFIT2", 	"IFNGR2", 	"IL12B", 	"IL15RA", 	"IL18", 	"IL1A", 	"IL1B", 	"IL23A", 	"IL6", 	"IL6ST", 	"IL7R", 	"INHBA", 	"IRF1", 	"IRS2", 	"JAG1", 	"JUN", 	"JUNB", 	"KDM6B", 	"KLF10", 	"KLF2", 	"KLF4", 	"KLF6", 	"KLF9", 	"KYNU", 	"LAMB3", 	"LDLR", 	"LIF", 	"LITAF", 	"MAFF", 	"MAP2K3", 	"MAP3K8", 	"MARCKS", 	"MCL1", 	"MSC", 	"MXD1", 	"MYC", 	"NAMPT", 	"NFAT5", 	"NFE2L2", 	"NFIL3", 	"NFKB1", 	"NFKB2", 	"NFKBIA", 	"NFKBIE", 	"NINJ1", 	"NR4A1", 	"NR4A2", 	"NR4A3", 	"OLR1", 	"PANX1", 	"PDE4B", 	"PDLIM5", 	"PER1", 	"PFKFB3", 	"PHLDA1", 	"PHLDA2", 	"PLAU", 	"PLAUR", 	"PLEK", 	"PLK2", 	"PLPP3", 	"PMEPA1", 	"PNRC1", 	"PPP1R15A", 	"PTGER4", 	"PTGS2", 	"PTPRE", 	"PTX3", 	"RCAN1", 	"REL", 	"RELA", 	"RELB", 	"RHOB", 	"RIPK2", 	"RNF19B", 	"SAT1", 	"SDC4", 	"SERPINB2", 	"SERPINB8", 	"SERPINE1", 	"SGK1", 	"SIK1", 	"SLC16A6", 	"SLC2A3", 	"SLC2A6", 	"SMAD3", 	"SNN", 	"SOCS3", 	"SOD2", 	"SPHK1", 	"SPSB1", 	"SQSTM1", 	"STAT5A", 	"TANK", 	"TAP1", 	"TGIF1", 	"TIPARP", 	"TLR2", 	"TNC", 	"TNF", 	"TNFAIP2", 	"TNFAIP3", 	"TNFAIP6", 	"TNFAIP8", 	"TNFRSF9", 	"TNFSF9", 	"TNIP1", 	"TNIP2", 	"TRAF1", 	"TRIB1", 	"TRIP10", 	"TSC22D1", 	"TUBB2A", 	"VEGFA", 	"YRDC", 	"ZBTB10", 	"ZC3H12A", 	"ZFP36"]

#https://www.gsea-msigdb.org/gsea/msigdb/cards/HALLMARK_IL6_JAK_STAT3_SIGNALING
JAK_genes = ["A2M", 	"ACVR1B", 	"ACVRL1", 	"BAK1", 	"CBL", 	"CCL7", 	"CCR1", 	"CD14", 	"CD36", 	"CD38", 	"CD44", 	"CD9", 	"CNTFR", 	"CRLF2", 	"CSF1", 	"CSF2", 	"CSF2RA", 	"CSF2RB", 	"CSF3R", 	"CXCL1", 	"CXCL10", 	"CXCL11", 	"CXCL13", 	"CXCL3", 	"CXCL9", 	"DNTT", 	"EBI3", 	"FAS", 	"GRB2", 	"HAX1", 	"HMOX1", 	"IFNAR1", 	"IFNGR1", 	"IFNGR2", 	"IL10RB", 	"IL12RB1", 	"IL13RA1", 	"IL15RA", 	"IL17RA", 	"IL17RB", 	"IL18R1", 	"IL1B", 	"IL1R1", 	"IL1R2", 	"IL2RA", 	"IL2RG", 	"IL3RA", 	"IL4R", 	"IL6", 	"IL6ST", 	"IL7", 	"IL9R", 	"INHBE", 	"IRF1", 	"IRF9", 	"ITGA4", 	"ITGB3", 	"JUN", 	"LEPR", 	"LTB", 	"LTBR", 	"MAP3K8", 	"MYD88", 	"OSMR", 	"PDGFC", 	"PF4", 	"PIK3R5", 	"PIM1", 	"PLA2G2A", 	"PTPN1", 	"PTPN11", 	"PTPN2", 	"REG1A", 	"SOCS1", 	"SOCS3", 	"STAM2", 	"STAT1", 	"STAT2", 	"STAT3", 	"TGFB1", 	"TLR2", 	"TNF", 	"TNFRSF12A", 	"TNFRSF1A", 	"TNFRSF1B", 	"TNFRSF21", 	"TYK2"]

#https://www.gsea-msigdb.org/gsea/msigdb/cards/GO_RESPONSE_TO_TYPE_I_INTERFERON
IFN_genes = ["ABCE1", 	"ADAR", 	"BST2", 	"CACTIN", 	"CDC37", 	"CNOT7", 	"DCST1", 	"EGR1", 	"FADD", 	"GBP2", 	"HLA-A", 	"HLA-B", 	"HLA-C", 	"HLA-E", 	"HLA-F", 	"HLA-G", 	"HLA-H", 	"HSP90AB1", 	"IFI27", 	"IFI35", 	"IFI6", 	"IFIT1", 	"IFIT2", 	"IFIT3", 	"IFITM1", 	"IFITM2", 	"IFITM3", 	"IFNA1", 	"IFNA10", 	"IFNA13", 	"IFNA14", 	"IFNA16", 	"IFNA17", 	"IFNA2", 	"IFNA21", 	"IFNA4", 	"IFNA5", 	"IFNA6", 	"IFNA7", 	"IFNA8", 	"IFNAR1", 	"IFNAR2", 	"IFNB1", 	"IKBKE", 	"IP6K2", 	"IRAK1", 	"IRF1", 	"IRF2", 	"IRF3", 	"IRF4", 	"IRF5", 	"IRF6", 	"IRF7", 	"IRF8", 	"IRF9", 	"ISG15", 	"ISG20", 	"JAK1", 	"LSM14A", 	"MAVS", 	"METTL3", 	"MIR21", 	"MMP12", 	"MUL1", 	"MX1", 	"MX2", 	"MYD88", 	"NLRC5", 	"OAS1", 	"OAS2", 	"OAS3", 	"OASL", 	"PSMB8", 	"PTPN1", 	"PTPN11", 	"PTPN2", 	"PTPN6", 	"RNASEL", 	"RSAD2", 	"SAMHD1", 	"SETD2", 	"SHFL", 	"SHMT2", 	"SP100", 	"STAT1", 	"STAT2", 	"TBK1", 	"TREX1", 	"TRIM56", 	"TRIM6", 	"TTLL12", 	"TYK2", 	"UBE2K", 	"USP18", 	"WNT5A", 	"XAF1", 	"YTHDF2", 	"YTHDF3", 	"ZBP1"]

In [None]:
#Score genes

sc.tl.score_genes(myeloid, TNF_genes, ctrl_size=50, gene_pool=None, n_bins=25, score_name='TNF_score', random_state=0, copy=False, use_raw=None)
    
sc.tl.score_genes(myeloid, JAK_genes, ctrl_size=50, gene_pool=None, n_bins=25, score_name='JAK_score', random_state=0, copy=False, use_raw=None)
        
sc.tl.score_genes(myeloid, IFN_genes, ctrl_size=50, gene_pool=None, n_bins=25, score_name='IFN_score', random_state=0, copy=False, use_raw=None)
  

In [None]:
#Export for visualisation in R

myeloid.obs[['full_clustering', 'severity', 'TNF_score', 'JAK_score', 'IFN_score']].to_csv('TNF_IFN_JAK_scores_v2.csv')