In [None]:
import sys #updated 2022_1_12
sys.path.append("/Users/yzhiyuan/workspace/DENV_10X/notebook/")  # go to parent dir
import lib.util
from lib.util import *

In [None]:
def cluster (adata):
    sc.pp.highly_variable_genes(adata, flavor='cell_ranger', n_top_genes=2000)
    sc.pp.pca(adata, n_comps=40, use_highly_variable=True, svd_solver='arpack')
    sc.pp.neighbors(adata, n_pcs = 15)
    sc.tl.umap(adata)
    sc.tl.leiden(adata, resolution=1, key_added = 'leiden_r1')
    sc.tl.leiden(adata, resolution=0.5, key_added = 'leiden_r0.5')
    sc.tl.rank_genes_groups(adata, groupby='leiden_r1', key_added='rank_genes_r1')
    sc.tl.rank_genes_groups(adata, groupby='leiden_r0.5', key_added='rank_genes_r0.5')
#     sc.tl.louvain(adata, resolution=0.2, key_added = 'louvain_r0.2')
#     sc.tl.louvain(adata, resolution=0.3, key_added = 'louvain_r0.3')
#     sc.tl.rank_genes_groups(adata, groupby='louvain_r0.2', key_added='rank_genes_r0.2')
#     sc.tl.rank_genes_groups(adata, groupby='louvain_r0.3', key_added='rank_genes_r0.3')


def reorg_celltype(adata):
    adata.obs.cell_type_new.cat.add_categories(['T cells', 'NK cells', 'B cells'], inplace = True)
    adata.obs.loc[((adata.obs.cell_subtype_new == 'CD4+ T cells') |
                            (adata.obs.cell_subtype_new == 'CD8+ effector T cells') |
                            (adata.obs.cell_subtype_new == 'CD8+ naive/memory T cells')), 'cell_type_new'] = 'T cells'
    
    adata.obs.loc[((adata.obs.cell_subtype_new == 'XCL_high NK cells') |
                            (adata.obs.cell_subtype_new == 'XCL_low NK cells')), 'cell_type_new'] = 'NK cells'
    
    adata.obs.loc[adata.obs.cell_type_new == 'B_cells', 'cell_type_new'] = 'B cells'
    
    adata.obs.cell_type_new.cat.remove_categories(['NK/T_cells', 'B_cells'], inplace = True)
    
    
    group_order_primary = ['B cells', 'Plasmablasts', 'T cells', 'NK cells', 'Monocytes', 'conventional DCs', 'plasmacytoid DCs', 
                'megakaryocytes']
    adata.obs.cell_type_new.cat.reorder_categories(group_order_primary, inplace = True)
    
    
    
    
    adata.obs.cell_subtype_new.cat.add_categories(['proliferating plasmablasts', 
                                                   'non-proliferating plasmablasts', 
                                                   'non-classical monocytes',
                                                   'intermediate monocytes'], inplace = True)
    
    
    
    adata.obs.loc[adata.obs.cell_subtype_new == 'cycling Plasmablasts', 
                  'cell_subtype_new'] = 'proliferating plasmablasts'
    adata.obs.loc[adata.obs.cell_subtype_new == 'non_cycling Plasmablasts', 
                  'cell_subtype_new'] = 'non-proliferating plasmablasts' 
    adata.obs.loc[adata.obs.cell_subtype_new == 'non_classical monocytes', 
                  'cell_subtype_new'] = 'non-classical monocytes'
    adata.obs.loc[adata.obs.cell_subtype_new == 'macrophages', 
                  'cell_subtype_new'] = 'intermediate monocytes'
    
    adata.obs.cell_subtype_new.cat.remove_categories(['cycling Plasmablasts', 
                                                   'non_cycling Plasmablasts',
                                                   'non_classical monocytes',
                                                   'macrophages'], inplace = True)
    
    
    
    
    group_order_secondary = ['memory B cells',
       'naive B cells', 'activated B cells', 'proliferating plasmablasts',
       'non-proliferating plasmablasts', 'CD4+ T cells', 'CD8+ effector T cells', 
        'CD8+ naive/memory T cells', 'XCL_high NK cells', 'XCL_low NK cells',
       'classical monocytes', 'non-classical monocytes', 'intermediate monocytes',
        'conventional DCs', 'plasmacytoid DCs', 'megakaryocytes']
    
    adata.obs.cell_subtype_new.cat.reorder_categories(group_order_secondary, inplace = True)
    
def umap_plot(adata, path, group, color_map, title, filetype):
    rcParams['figure.figsize']=(7,7)
    rcParams['font.size']= 20
    rcParams['pdf.fonttype'] = 42
    rcParams['font.sans-serif'] = "Arial"
    
    name = 'umap_' + group +'.' + filetype
    
    temp = adata[(adata.obs[group] != 'unknown') & (adata.obs[group] != 'doublets')].copy()
#     groups = [x for x in adata.obs[group].unique() if x not in ['doublets', 'unknown']]

    fig = sc.pl.umap(temp, color=group, palette = color_map,return_fig = True)
    plt.title(title)
    plt.legend(markerscale = 3, fontsize = 16, bbox_to_anchor=(1, 1), frameon = False)
    fig.savefig(os.path.join(path, name), bbox_inches='tight')
 
def umap_plot_2(adata, path, group, title, filetype):
    rcParams['figure.figsize']=(7,7)
    rcParams['font.size']= 20
    rcParams['pdf.fonttype'] = 42
    rcParams['font.sans-serif'] = "Arial"
    
    name = 'umap_' + group +'.' + filetype
    
    temp = adata[(adata.obs[group] != 'unknown') & (adata.obs[group] != 'doublets')].copy()
#     groups = [x for x in adata.obs[group].unique() if x not in ['doublets', 'unknown']]

    fig = sc.pl.umap(temp, color=group, return_fig = True)
    plt.title(title)
    plt.legend(markerscale = 3, fontsize = 16, bbox_to_anchor=(1, 1), frameon = False)
    fig.savefig(os.path.join(path, name), bbox_inches='tight') 
    
def dotplot(adata, path, marker_genes):
    sc.settings.figdir = path
#     rcParams['figure.figsize']=(14,7)
    rcParams['pdf.fonttype'] = 42
    rcParams['font.size'] = 18
    
    temp = adata[(adata.obs['cell_subtype_new'] != 'unknown') & 
                 (adata.obs['cell_subtype_new'] != 'doublets')].copy()

    sc.pl.dotplot(temp, marker_genes, groupby='cell_subtype_new', use_raw=False, figsize = (14, 7),
                  save = 'dotplot.pdf')
    sc.pl.dotplot(temp, marker_genes, groupby='cell_subtype_new', use_raw=False, figsize = (14, 7),
                  save = 'dotplot.png')


In [None]:
# load data
path = '/Users/yzhiyuan/workspace/DENV_10X/PythonProject/PNAS_10X/mergedata_20211001.loom'
adata = getdata(path)
adata_kid = subsetdata(adata)
adata_kid = normalizedata(adata_kid)
adata_kid = removegenes(adata_kid)

adata_kid = adata_kid[(adata_kid.obs.cell_subtype_new != 'doublets') & 
                        (adata_kid.obs.cell_subtype_new != 'unknown')].copy()

sc.pp.filter_genes(adata_kid, min_cells=3)

In [None]:
#clustering
cluster (adata_kid)

In [None]:
#rename and reorder some celltypes 

data_kid = adata_kid.copy()
reorg_celltype(data_kid)

In [None]:
#Figure 1B, umap

savepath = '/Users/yzhiyuan/workspace/DENV_10X/figures/Manuscript/raw_figure_V4'
color_map = 'Dark2'


umap_plot(data_kid, savepath, 'cell_type_new', color_map,'', 'png')
umap_plot(data_kid, savepath, 'cell_type_new', color_map,'', 'pdf')

savepath = '/Users/yzhiyuan/workspace/DENV_10X/figures/Manuscript/raw_figure_V4'

umap_plot_2(data_kid, savepath, 'cell_subtype_new', '', 'png')
umap_plot_2(data_kid, savepath, 'cell_subtype_new', '', 'pdf')

In [None]:
#Figure 1C, biomarkers for the cell subtypes
savepath = '/Users/yzhiyuan/workspace/DENV_10X/figures/Manuscript/raw_figure_V4' 

marker_genes = ['CD79A', 'MS4A1', 'TCL1A', 'FCER2',
                'CD38', 'IGHG1', 'CCND2', 'MKI67',
                'CD3D', 'IL7R', 'CD8A', 'CD8B', 'GZMA', 'GZMK', 
                'NKG7', 'GNLY', 'XCL1', 'XCL2',
                'LYZ', 'CD14', 'FCGR3A', 'C1QA',
                'CD1C', 'IRF8', 'CLEC4C', 
                'PF4', 'PPBP', 'ITGA2B']

dotplot(data_kid, savepath, marker_genes)

In [None]:
#select 3 DWS patients and plot umap for Figure2 (Original figure 1E)

#normalize the virue reads
virus_nor(data_kid)

#select 3 DWS patients
data_kid_3DWS = data_kid[(data_kid.obs.ID == '5_089_01') | 
                         (data_kid.obs.ID == '6_020_01') |
                         (data_kid.obs.ID == '6_025_01')].copy()

#plotting
savepath = '/Users/yzhiyuan/workspace/DENV_10X/figures/Manuscript/raw_figure_V4'

umap_plot_2(data_kid_3DWS, savepath, 'DENV_reads_nor', '', 'png')
umap_plot_2(data_kid_3DWS, savepath, 'DENV_reads_nor', '', 'pdf')

