In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import scanorama
import scipy
import os
import anndata as anndata
import scvelo as scv
#import scanpy_cluster_proportions
import scanpy.external as sce

In [None]:
import matplotlib as mpl
new_rc_params = {'text.usetex': False,
"svg.fonttype": 'none'
}
mpl.rcParams.update(new_rc_params)

In [None]:
path_to_results = '../results/cell_type_annotation'
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=100, facecolor='white', figsize = (4,4), dpi_save=300, frameon = False)
sc.settings.figdir = path_to_results
umap_point_size = 10
umap_transparency = 0.3
umap_continuous_point_size = 50
umap_continuous_transparency = 0.7
aspect_ratio = 1
save_figure = True

In [None]:
adata = sc.read(os.path.join('../data/intermediate/GSE212966', 'preprocessed.h5ad'))
adata.uns['log1p']['base'] = None

# Integrate with harmony

In [None]:
sce.pp.harmony_integrate(adata, 'sample')

# Computing the neighborhood graph and embedding in UMAP

In [None]:
sc.pp.neighbors(adata, use_rep = 'X_pca_harmony',
                n_neighbors=15,
                n_pcs=15
               )

In [None]:
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color= ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_Rp','sample'], wspace = 0.5, ncols = 3)

## Expression of marker genes

In [None]:
gene_list = ['PTPRC', 'CD68','MKI67', 'HMOX1', 'HSPA5', 'MARCO', 'CD74', 'SPP1']
#gene_list = ['VEGFA','ELANE','FUT4','CD69', 'CD164', 'S100A8', #'CD15',
#             'MPO','SLC7A11', 'SLC48A1','LAMP3', 'CCL22', 'TTF1', 'KRT18', 'KRT19', 'CLU', 'MMP7', 'SPP1', 'REG1A', 'CTRB2', 'PRSS1', 'DCN', 'LUM', 'CPA3', 'TPSAB1', 'CDH5', 'VWF', 'PLVAP', 'IRF7', 'RGS5',
#             'PDGFRB', 'CD3E', 'NCAM1', 'NKG7', 'CD3D', 'CD14', 'HLA-DRA', 'GZMB', 'ITGAX', 'ITGAM', 'APOE', 'LYZ', 'IGJ', 'CD79A', 'MS4A1',
#            'PTPRC','MKI67', 'HMOX1', 'HSPA5', 'MARCO', 'CD74', 'ARG1', 'MMP8',# 'MMP12',
#             'PRDX1', 'GCLM', 'NQO1', 'GSTM1', 'SLC40A1']

In [None]:
for gene in gene_list:
    fig, (ax1) = plt.subplots(1)
    ax1.set_aspect(aspect_ratio)
    sc.pl.umap(adata, color = gene, size = umap_continuous_point_size, alpha = umap_continuous_transparency, ax = ax1, save = '{}.png'.format(gene) if save_figure else None)

In [None]:
sc.tl.embedding_density(adata, basis='umap', groupby='sample', key_added='umap_density_condition')
for sam in adata.obs['sample'].cat.categories:
    fig = sc.pl.embedding_density(adata, basis = 'umap', group = sam, key = 'umap_density_condition', return_fig = True, frameon = False)
    ax = plt.gca()
    ax.set_aspect(aspect_ratio)
    fig.canvas.draw()
    if save_figure:
        plt.savefig(os.path.join(path_to_results, 'density_{}.png'.format(sam)))

### Clustering the neighborhood graph

In [None]:
sc.tl.leiden(adata, resolution = 0.2)

In [None]:
fig, (ax1) = plt.subplots(1)
ax1.set_aspect(aspect_ratio)
sc.pl.umap(adata, color = ['leiden'], legend_loc='right margin', wspace=0.5, size = umap_point_size, alpha = umap_transparency, ax = ax1,save = 'leiden.png' if save_figure else None)

In [None]:
adata.uns['log1p']['base'] = None

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

# Cell type annotation

In [None]:
marker_genes_dict = {
    'Macrophages': ['CD14', 'HLA-DRA', 'ITGAM', 'ITGAX', 'APOE', 'LYZ', 'SPP1'],
    'paper': ['LAMP3', 'CCL22', 'TFF1', 'KRT18', 'KRT19', 'KRT8', 'CLU', 'MMP7', 'REG1A', 'CTRB2', 'PRSS1', 'DCN', 'LUM', 'CPA3', 'TPSAB1', 'CDH5', 'VWF', 'PLVAP', 'IRF7', 'RGS5',
             'PDGFRB', 'CD3E', 'NCAM1', 'NKG7', 'CD3D', 'GZMB', 'IGJ', 'CD79A', 'MS4A1'],
    'additional_from_Florence': ['ELANE', 'S100A8', 'MPO']
}
marker_genes = ['CD14', 'HLA-DRA', 'ITGAM', 'ITGAX', 'APOE', 'LYZ', 'CD68', 'CD163',#'SPP1', # mac markers
                'LAMP3', 'CCL22', 'TFF1', 'KRT18', 'KRT19', 'KRT8', 'CLU', 'MMP7', 'REG1A', 'CTRB2', 'PRSS1', 'DCN', 'LUM', 'CPA3', 'TPSAB1', 'CDH5', 'VWF', 'PLVAP', 'IRF7', 'RGS5',
                'PDGFRB', 'CD3E', 'NCAM1', 'NKG7', 'CD3D', 'GZMB', #'IGJ', 
                'CD79A', 'MS4A1', # from original papers excl. mac markers
                'S100A8', #'ELANE', 'MPO' # additional markers for other cell types than mac
               ]

In [None]:
sc.tl.dendrogram(adata, groupby= 'leiden')
sc.pl.dotplot(adata, marker_genes, groupby='leiden', dendrogram=True, var_group_positions=[(0,7)],
              var_group_labels=['Macrophages'], var_group_rotation=0, save = '.svg' if save_figure else None)

# save macrophages for further investigation

In [None]:
mac_clusters = ['3', '6']
adata_mac = adata[adata.obs.leiden.isin(mac_clusters)].copy()

In [None]:
adata.obs['cell_type'] = np.where(adata.obs.leiden.isin(mac_clusters), 'Macrophages', 'other')

In [None]:
fig, (ax1) = plt.subplots(1)
ax1.set_aspect(aspect_ratio)
sc.pl.umap(adata, color = 'cell_type', legend_loc='right margin', size = umap_point_size, alpha = umap_transparency, ax = ax1,save = '_cell_type.png' if save_figure else None)

In [None]:
adata_mac.write('../data/intermediate/GSE212966/macrophages.h5ad')