# Figures 1K and 3D: sMAC Ms4a3

docker image: cr.gitlab.uzh.ch/elena.duerst/

In [None]:
import scanpy as sc
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import scipy as scipy
import pathlib
import anndata
import decoupler as dc

In [None]:
import matplotlib as mpl
new_rc_params = {'text.usetex': False,
"svg.fonttype": 'none'
}
mpl.rcParams.update(new_rc_params) 

In [None]:
import sys
sys.path.insert(0, "../../shared/src")
import decoupler_helpers
import bicolor_embedding_plot

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=150, dpi_save = 300, frameon=False, transparent = True)

In [None]:
data_path = pathlib.Path('../data/raw')
path_to_intermediate_data = pathlib.Path('../data/intermediate/')
path_to_results = pathlib.Path('../results/analysis')
path_to_results.mkdir(parents= True, exist_ok=True)
sc.settings.figdir = path_to_results

In [None]:
save_figure = True
aspect_ratio = 1
umap_point_size = 15
umap_transparency = 0.3
umap_continuous_point_size = 30
umap_continuous_transparency = 0.7

# Data import

In [None]:
#TODO: use link from GEO

In [None]:
samples = ['sample_2', 'sample_1']

In [None]:
sample_files = list(data_path.glob('*_filtered_feature_bc_matrix.h5'))
adatas = []
samples = []
for sam in sample_files:
    adata = sc.read_10x_h5(sam)
    sample_name = sam.name.replace('_filtered_feature_bc_matrix.h5', '')
    adata.obs['sample'] = sample_name
    genotype = sample_name.split('_')[0]
    adata.obs['genotype'] = genotype
    print(adata.shape)
    adata.var_names_make_unique()
    samples.append(sample_name)
    adatas.append(adata)

# QC

In [None]:
 for adata in adatas:
    adata.var['mt'] = adata.var_names.str.startswith('mt-')
    sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
    adata.var['Rp'] = adata.var_names.str.startswith('Rp') 
    sc.pp.calculate_qc_metrics(adata, qc_vars=['Rp'], percent_top=None, log1p=False, inplace=True)

In [None]:
 for adata, name in zip(adatas, samples):
    print(name)
    sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_Rp'],
             jitter=0.4, multi_panel=True, save = f"{name}.png" if save_figure else None)

In [None]:
for adata in adatas:
    sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt', color= 'n_genes_by_counts')
    sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts', color = "pct_counts_mt")
    sc.pl.scatter(adata, x='pct_counts_mt', y='pct_counts_Rp', color = 'n_genes_by_counts')

In [None]:
min_numof_genes = 200
max_numof_genes = 10000
for adata, name in zip(adatas, samples):
    fig, axs = plt.subplots(1, 4, figsize=(12, 3))
    fig.suptitle(f"Covariates for filtering: {name}")

    sns.histplot(adata.obs["total_counts"], kde=False, ax=axs[0])
    sns.histplot(
        adata.obs["total_counts"][adata.obs["total_counts"] < 40000],
        kde=False,
        bins=40,
        ax=axs[1],
    )
    plot = sns.histplot(adata.obs["n_genes_by_counts"], kde=False, bins=60, ax=axs[2])
    plot.axvline(x = min_numof_genes, color = 'red')
    plot.axvline(x = max_numof_genes, color = 'red')
    sns.histplot(
        adata.obs["n_genes_by_counts"][adata.obs["n_genes_by_counts"] < min_numof_genes + 1000],
        kde=False,
        bins=60,
        ax=axs[3],
    )
    plt.axvline(x = min_numof_genes, color = 'red')

# Filtering

In [None]:
max_pct_mt = 5
for idx, adata in enumerate(adatas):
    adatas[idx] = adata[adata.obs['pct_counts_mt'] < max_pct_mt , :]

In [None]:
 for adata in adatas:
    sc.pp.filter_cells(adata, min_genes = min_numof_genes)
    sc.pp.filter_cells(adata, max_genes = max_numof_genes)
    sc.pp.filter_genes(adata, min_cells = 1)
    print(adata.shape)

# QC after filtering

In [None]:
for adata, name in zip(adatas, samples):
    sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_Rp'],
             jitter=0.4, multi_panel=True, save = f"{name}_filtered.png" if save_figure else None)

In [None]:
samples

# Concatenate samples

In [None]:
adata = anndata.concat(adatas, join = "outer", fill_value=0)
adata.obs_names_make_unique()

In [None]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

# Inspect unintegrated samples

In [None]:
sc.pp.highly_variable_genes(adata)

In [None]:
sc.pp.pca(adata, n_comps = 20, use_highly_variable=True)

In [None]:
sc.pp.neighbors(adata)

In [None]:
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata, resolution=0.8)

In [None]:
for obs_key in ['sample', 'leiden']:
    fig, (ax1) = plt.subplots(1,1)
    ax1.set_aspect('equal')
    sc.pl.umap(adata, color = obs_key, size=umap_point_size, alpha = umap_transparency, ax = ax1, save = '_{}.png'.format(obs_key))

In [None]:
sc.tl.embedding_density(adata, groupby='sample')

In [None]:
for sam in adata.obs['sample'].cat.categories:
    fig = sc.pl.embedding_density(adata, groupby='sample', group = sam, return_fig=True)
    fig.savefig(path_to_results / 'umap_density_sample_{}.png'.format(sam), transparent = True)

# Cell type annotation

In [None]:
cell_type_marker_genes = { #'Leukocytes' : ['Ptprc'],
                 'Neutrophils': ['S100a8', 'S100a9', 'Csf3r'],
                 'Macrophages': ['Cd68', 'Csf1r', 'Adgre1'],#, 'Fcgr2b'],
                 'DC': ['Batf3', 'Zbtb46', 'Ccr7'],
                 'T-cells': ['Cd8a', 'Cd4', 'Cd3d'],
                 'NK-cells': ['Gzmb', 'Gzma', 'Klrk1'],
                 'B-cells': ['Cd19', 'Ms4a1'],
                 'Epithelial cells': ['Krt19'],
                 'Endothelial cells': ['Epcam', 'Vcam1'],
                 'Tumor cells': ['Twist1', 'Tead1'],
                 'CAF': ['Col1a1', 'Cxcl5', 'Mmp2', 'Fap']
                }

In [None]:
for cell_type in cell_type_marker_genes.keys():
    print(cell_type)
    for gene in cell_type_marker_genes[cell_type]:
        fig, (ax1) = plt.subplots(1,1)
        ax1.set_aspect('equal')
        sc.pl.umap(adata, color = gene, size=umap_continuous_point_size, alpha = umap_continuous_transparency, ax = ax1, save = '_{}.png'.format(gene) if save_figure else None)

In [None]:
sc.tl.rank_genes_groups(adata, 'leiden', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
sc.pl.umap(adata, color = 'leiden')

In [None]:
sc.pl.dotplot(adata, var_names=cell_type_marker_genes, groupby='leiden')

In [None]:
# create a dictionary to map cluster to annotation label
cluster2celltype = {
     '0': 'Macrophages',
     '1': 'Macrophages',
     '2': 'Macrophages',
     '3': 'Macrophages',
     '4': 'Neutrophils',
     '5': 'Macrophages',
     '6': 'Macrophages',
     '7': 'Neutrophils',
     '8': 'Macrophages',
     '9': 'Macrophages',
     '10': 'Macrophages',
     '11': 'Endothelial cells',
     '12': 'T-cells and NK cells',
    '13': 'Macrophages'
}

# add a new `.obs` column called `cell type` by mapping clusters to annotation using pandas `map` function
adata.obs['cell_type'] = adata.obs['leiden'].map(cluster2celltype).astype('category')

In [None]:
fig, (ax1) = plt.subplots(1,1)
ax1.set_aspect('equal')
sc.pl.umap(adata, color = 'cell_type', size=umap_point_size, alpha = umap_transparency, ax = ax1, save = '_cell_type.png' if save_figure else None)

In [None]:
cell_type_colors = adata.uns['cell_type_colors']

In [None]:
cell_type_colors

In [None]:
cell_type_colors[3], cell_type_colors[1] = cell_type_colors[1], cell_type_colors[3]

In [None]:
cell_type_colors

In [None]:
adata.uns['cell_type_colors'] = cell_type_colors

In [None]:
adata.uns['cell_type_colors']

In [None]:
fig, (ax1) = plt.subplots(1,1)
ax1.set_aspect('equal')
sc.pl.umap(adata, color = 'cell_type', size=umap_point_size, alpha = umap_transparency, ax = ax1, save = '_cell_type.png' if save_figure else None)

In [None]:
adata_mac = adata[adata.obs.cell_type == 'Macrophages']

In [None]:
adata_mac.write_h5ad(path_to_intermediate_data / 'macrophages.h5ad')

In [None]:
adata_mac_plus_cluster7 = adata[(adata.obs.cell_type == 'Macrophages') | (adata.obs.leiden == '7')]

In [None]:
fig, (ax1) = plt.subplots(1,1)
ax1.set_aspect('equal')
sc.pl.umap(adata_mac_plus_cluster7, ax = ax1, color = 'cell_type')

In [None]:
sc.pp.highly_variable_genes(adata_mac)

In [None]:
sc.pp.pca(adata_mac, n_comps=20, use_highly_variable=True)

In [None]:
sc.pp.neighbors(adata_mac)

In [None]:
sc.tl.umap(adata_mac)

In [None]:
sc.pl.umap(adata_mac, color = 'sample')

# Cell function

In [None]:
mac_function = decoupler_helpers.gmt_to_decoupler('../../shared/databases/macrophage_function.gmt')

In [None]:
important_terms = [
             'Complement & Phagocytosis',
             'Oxidative Stress',
             'Cycling',
             'Antigen Processing And Presentation']

In [None]:
mac_function = mac_function[mac_function.geneset.isin(important_terms)]

In [None]:
mac_function = decoupler_helpers.prepare_db_for_gsea(mac_function, min_geneset_size=1)

In [None]:
dc.run_aucell(adata_mac, mac_function, source='geneset', target='genesymbol', use_raw=False)

In [None]:
adata_mac.obs[important_terms] = adata_mac.obsm["aucell_estimate"][important_terms]

In [None]:
for term in important_terms:
    fig, (ax1) = plt.subplots(1,1)
    ax1.set_aspect('equal')
    sc.pl.umap(adata_mac, color = term,  size=umap_continuous_point_size, alpha= umap_continuous_transparency, color_map='magma', ax = ax1, save = '_{}.png'.format(term).replace(" ","_") if save_figure else None)

# Fig 1 K (left): Expression of marker genes (cycling macrophages are greyed out)

In [None]:
marker_genes = ['Arg1', 'Spp1',  'H2-Eb1', 'Cxcl9']

In [None]:
for gene in marker_genes:
    ax1 = sc.pl.umap(adata_mac, color = None, size= umap_continuous_point_size, alpha= umap_continuous_transparency, show = False)
    ax1.set_aspect('equal')
    sc.pl.umap(adata_mac[adata_mac.obs.Cycling == 0], color = gene, size=umap_continuous_point_size, alpha= umap_continuous_transparency, cmap='YlGnBu', ax = ax1, save = '_{}_wo_Cycling.png'.format(gene) if save_figure else None)

# Fig 1 K (right): Co-expression plots (Spp1, Cxcl9)

In [None]:
df = sc.get.obs_df(adata_mac, keys = ['Spp1', 'Cxcl9'])

In [None]:
conditions = [
    (df['Cxcl9'] > 0) & (df['Spp1'] > 0),
    (df['Cxcl9'] > 0) & (df['Spp1'] == 0),
    (df['Cxcl9'] == 0)& (df['Spp1'] > 0),
    (df['Cxcl9'] == 0)& (df['Spp1'] == 0),
]

results = ['Cxcl9 & Spp1', 'Cxcl9', 'Spp1', 'none']

adata_mac.obs['Cxcl9_Spp1'] = np.select(conditions, results)

In [None]:
Cxcl9_Spp1_colors= {'Cxcl9':'#40E0D0', 'Spp1':'#FF1493', 'Cxcl9 & Spp1': '#5D29A6', 'none': '#F2F2F2'}

In [None]:
fig, (ax1) = plt.subplots(1,1)
ax1.set_aspect('equal')
sc.pl.umap(adata_mac[adata_mac.obs.Cycling <= 0], color = 'Cxcl9_Spp1', size = umap_point_size, alpha = umap_transparency, palette=Cxcl9_Spp1_colors, ax = ax1, save = '_Cxcl9_Spp1_coexpression_categorical.png')

In [None]:
adata_mac_wo_cycling = adata_mac[adata_mac.obs.Cycling <= 0].copy()

In [None]:
bicolor_embedding_plot.bicolor_embedding_plot(adata_mac_wo_cycling, x_key= 'Spp1', y_key='Cxcl9', embedding='umap', save_path=path_to_results)

# TAM Nrf2 scoring

In [None]:
database_path = pathlib.Path('../../shared/databases/')
numof_genes = 150
KOvsWT_down_genelist = pd.read_csv(database_path / '{}_KO--over--WT_Down-Regulated.csv'.format(numof_genes), header=0).gene_name
KOvsWT_up_genelist = pd.read_csv(database_path / '{}_KO--over--WT_Up-Regulated.csv'.format(numof_genes), header=0).gene_name

In [None]:
sc.tl.score_genes(adata_mac, gene_list=KOvsWT_down_genelist, score_name='KOvsWT_down_score')

In [None]:
sc.tl.score_genes(adata_mac, gene_list=KOvsWT_up_genelist, score_name='KOvsWT_up_score')

In [None]:
sc.pl.umap(adata_mac, color = ['KOvsWT_up_score'], vmin = 'p0.5', vmax = 'p99.5', size=umap_continuous_point_size, alpha= umap_continuous_transparency, color_map='magma')

In [None]:
fig, (ax1) = plt.subplots(1,1)
ax1.set_aspect('equal')
sc.pl.umap(adata_mac, color = 'KOvsWT_down_score', vmin = 'p0.5', vmax = 'p99.5', size=umap_continuous_point_size, alpha= umap_continuous_transparency, color_map='magma', ax = ax1, save = '_{}.png'.format('KOvsWT_down_score') if save_figure else None)

In [None]:
adata_mac.obs['KOvsWT_up_minus_down_score'] = adata_mac.obs.KOvsWT_up_score - adata_mac.obs.KOvsWT_down_score
vmax = adata_mac.obs.KOvsWT_up_minus_down_score.abs().quantile(q = 0.99)
vmin = -vmax
fig, (ax1) = plt.subplots(1,1)
ax1.set_aspect('equal')
sc.pl.umap(adata_mac, color = 'KOvsWT_up_minus_down_score', vmin = 'p0.5', vmax = 'p99.5', size=umap_continuous_point_size, alpha= umap_continuous_transparency, color_map='magma', ax = ax1, save = '_{}.png'.format('KOvsWT_down_score') if save_figure else None)

# Fig 3D

In [None]:
adata_mac.obs['KOvsWT_up_minus_down_score_quartile'] = pd.qcut(adata_mac.obs['KOvsWT_up_minus_down_score'], q = 4, labels=['lowest', 'intermediate-low', 'intermediate-high', 'highest'])

In [None]:
quartile_colors= {'lowest':'#40E0D0', 'highest':'#FF1493', 'intermediate-low': '#5D29A6', 'intermediate-high': '#5D29A6'}

In [None]:
adata_mac_wo_cycling = adata_mac[adata_mac.obs.Cycling <= 0].copy()

In [None]:
ax1 = sc.pl.umap(adata_mac, color = None, show=False)
ax1.set_aspect('equal')
sc.pl.umap(adata_mac_wo_cycling, color = 'KOvsWT_up_minus_down_score_quartile', size=umap_point_size, alpha= umap_transparency, palette =quartile_colors, ax = ax1, save = '_{}.png'.format('KOvsWT_down_score_quartiles') if save_figure else None)

In [None]:
adata_mac.obs_keys()

In [None]:
sc.pl.violin(adata_mac, keys='KOvsWT_up_minus_down_score', groupby='Cxcl9_Spp1', palette = Cxcl9_Spp1_colors, stripplot=False, inner = 'box', rotation=90,save = 'KOvsWTscore_vs_Spp1_Cxcl9.svg')