In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
import os
import anndata as anndata
import scvelo as scv
import scanpy.external as sce
from corr_dotplot import *

In [None]:
import matplotlib as mpl
new_rc_params = {'text.usetex': False,
"svg.fonttype": 'none'
}
mpl.rcParams.update(new_rc_params)

In [None]:
path_to_results = '../results'
path_to_root_dir = '../../'
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=100, facecolor='white', figsize = (4,4), dpi_save=300, frameon = False)
sc.settings.figdir = path_to_results
umap_point_size = 70
umap_transparency = 0.7
umap_continuous_point_size = 50
umap_continuous_transparency = 0.7
aspect_ratio = 1
save_figure = True

In [None]:
studies = ['GSE155698', 'GSE212966']

In [None]:
adatas = []
for study in studies:
    path = '../../{}/data/intermediate/macrophages.h5ad'.format(study)
    print(path)
    adata = sc.read(path)
    adata.obs.study = study
    adatas.append(adata)

In [None]:
adata = anndata.concat(adatas)

In [None]:
adata.obs_names_make_unique()

In [None]:
sum(adata.obs.index.duplicated())

# Integrate with harmony

In [None]:
sc.pp.highly_variable_genes(adata)

In [None]:
sc.pp.pca(adata, n_comps=50, use_highly_variable=True, svd_solver='arpack')

In [None]:
sc.pl.pca_overview(adata, color = 'sample', components = ['1,2', '2,3', '3,4'], frameon = True)

In [None]:
sce.pp.harmony_integrate(adata, 'sample')

# Computing the neighborhood graph and embedding in UMAP

In [None]:
sc.pp.neighbors(adata, use_rep = 'X_pca_harmony',
                n_neighbors=15,
                n_pcs=15
               )

In [None]:
sc.tl.umap(adata)

In [None]:
sc.pl.umap(adata, color= ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_Rp','sample'], wspace = 0.5, ncols = 3)

## Expression of marker genes

In [None]:
gene_list = ['PTPRC', 'CD68','MKI67', 'HMOX1', 'HSPA5', 'MARCO', 'CD74', 'SPP1', 'CXCL9']
#gene_list = ['VEGFA','ELANE','FUT4','CD69', 'CD164', 'S100A8', #'CD15',
#             'MPO','SLC7A11', 'SLC48A1','LAMP3', 'CCL22', 'TTF1', 'KRT18', 'KRT19', 'CLU', 'MMP7', 'SPP1', 'REG1A', 'CTRB2', 'PRSS1', 'DCN', 'LUM', 'CPA3', 'TPSAB1', 'CDH5', 'VWF', 'PLVAP', 'IRF7', 'RGS5',
#             'PDGFRB', 'CD3E', 'NCAM1', 'NKG7', 'CD3D', 'CD14', 'HLA-DRA', 'GZMB', 'ITGAX', 'ITGAM', 'APOE', 'LYZ', 'IGJ', 'CD79A', 'MS4A1',
#            'PTPRC','MKI67', 'HMOX1', 'HSPA5', 'MARCO', 'CD74', 'ARG1', 'MMP8',# 'MMP12',
#             'PRDX1', 'GCLM', 'NQO1', 'GSTM1', 'SLC40A1']

In [None]:
for gene in gene_list:
    fig, (ax1) = plt.subplots(1)
    ax1.set_aspect(aspect_ratio)
    sc.pl.umap(adata, color = gene, size = umap_continuous_point_size, alpha = umap_continuous_transparency, ax = ax1, save = '{}.png'.format(gene) if save_figure else None)

In [None]:
sc.tl.embedding_density(adata, basis='umap', groupby='sample', key_added='umap_density_condition')
for sam in adata.obs['sample'].cat.categories:
    fig = sc.pl.embedding_density(adata, basis = 'umap', group = sam, key = 'umap_density_condition', return_fig = True, frameon = False)
    ax = plt.gca()
    ax.set_aspect(aspect_ratio)
    fig.canvas.draw()
    if save_figure:
        plt.savefig(os.path.join(path_to_results, 'density_{}.png'.format(sam)))

### Clustering the neighborhood graph

In [None]:
sc.tl.leiden(adata, resolution = 0.2)

In [None]:
fig, (ax1) = plt.subplots(1)
ax1.set_aspect(aspect_ratio)
sc.pl.umap(adata, color = ['leiden'], legend_loc='right margin', wspace=0.5, size = umap_point_size, alpha = umap_transparency, ax = ax1,save = 'leiden.png' if save_figure else None)

In [None]:
genes_to_plot = ['SPP1', 'CXCL9']

In [None]:
spp1_cxcl9 = pd.DataFrame(adata[:, genes_to_plot].X.todense(), columns=genes_to_plot)

In [None]:
spp1_cxcl9

In [None]:
sns.scatterplot(spp1_cxcl9, x = 'CXCL9', y = 'SPP1')

# Correlation of HMOX1, MARCO and SPP1

In [None]:
corr_per_sample = pd.DataFrame()
genes = ['HMOX1', 'MARCO', 'SPP1', 'CD163']
for cluster in adata.obs.leiden.cat.categories:
    genes_adata = adata[adata.obs.leiden == cluster, adata.var_names.isin(genes)]
    for sam in adata.obs['sample'].cat.categories:
        genes_adata_sample = genes_adata[genes_adata.obs['sample'] == sam]
        gene_df = pd.DataFrame.sparse.from_spmatrix(genes_adata_sample.X.astype(bool))
        gene_df.columns = genes
        corr = compute_correlation_data(gene_df, genes)
        corr_sample = corr
        corr_sample['sample'] = sam
        corr_sample['cluster'] = cluster
        corr_per_sample = corr_per_sample.append(corr_sample)
        fig, ax = plt.subplots(1)
        fig.set_size_inches(len(genes) * 2/3, len(genes) * 2/3 * 14/15)
        heatmap(x=corr['x'], y = corr['y'], x_order=genes, y_order=genes,
                color = corr['jaccard'].values.astype('float'), color_range = [0,1], size = corr['coexpression_ratio'].values.astype('float'), size_range = [0,1],
            save = os.path.join(path_to_results, 'correlation_dotplot_cluster_{}_sample_{}.svg'.format(cluster, sam)) if save_figure else False, title = 'cluster {}, sample {}'.format(cluster, sam))

In [None]:
corr_per_sample.to_csv(os.path.join(path_to_results, 'correlation_data.csv'), index=False)

# KeapKOvsWT score

In [None]:
import pathlib

In [None]:
database_path = pathlib.Path('../../gsea_db/')
numof_genes = 150
KOvsWT_down_genelist = pd.read_csv(database_path / '{}_KO--over--WT_Down-Regulated.csv'.format(numof_genes), header=0).gene_name
KOvsWT_up_genelist = pd.read_csv(database_path / '{}_KO--over--WT_Up-Regulated.csv'.format(numof_genes), header=0).gene_name
KOvsWT_down_genelist = KOvsWT_down_genelist.str.upper()
KOvsWT_up_genelist = KOvsWT_up_genelist.str.upper()

In [None]:
sc.tl.score_genes(adata, gene_list=KOvsWT_down_genelist, score_name='KOvsWT_down_score')

In [None]:
sc.tl.score_genes(adata, gene_list=KOvsWT_up_genelist, score_name='KOvsWT_up_score')

In [None]:
sc.pl.umap(adata, color = 'KOvsWT_down_score', cmap='Oranges')

In [None]:
sc.pl.umap(adata, color = 'KOvsWT_up_score', cmap='Oranges')

In [None]:
adata.obs['KOvsWT_up_minus_down_score'] = adata.obs['KOvsWT_up_score'] - adata.obs['KOvsWT_down_score']

In [None]:
vmax = adata.obs['KOvsWT_up_minus_down_score'].abs().quantile(0.99)
vmin = -vmax
sc.pl.umap(adata, color = 'KOvsWT_up_minus_down_score', vmax = vmax, vmin = vmin, cmap='bwr')

In [None]:
df = sc.get.obs_df(adata, keys = ['SPP1', 'CXCL9', 'CD74', 'CXCL10'])

In [None]:
#define conditions
conditions = [
    (df['CXCL9'] > 0) & (df['SPP1'] > 0),
    (df['CXCL9'] > 0) & (df['SPP1'] == 0),
    (df['CXCL9'] == 0)& (df['SPP1'] > 0),
    (df['CXCL9'] == 0)& (df['SPP1'] == 0),
]

#define results
results = ['CXCL9 & SPP1', 'CXCL9', 'SPP1', 'nothing']

#create new column based on conditions in column1 and column2
adata.obs['CXCL9_SPP1'] = np.select(conditions, results)

In [None]:
Cxcl9_Spp1_colors= {'CXCL9':'Yellow', 'SPP1':'Cyan', 'CXCL9 & SPP1': 'Magenta', 'nothing': 'lightgrey'}

In [None]:
sc.pl.umap(adata, color = 'CXCL9_SPP1', palette=Cxcl9_Spp1_colors, sort_order=False, save = '_Cxcl9_Spp1_coexpression.png')

In [None]:
sc.pl.violin(adata, keys='KOvsWT_up_minus_down_score', groupby='CXCL9_SPP1')

In [None]:
#define conditions
conditions = [
    (df['CXCL10'] > 0) & (df['SPP1'] > 0),
    (df['CXCL10'] > 0) & (df['SPP1'] == 0),
    (df['CXCL10'] == 0)& (df['SPP1'] > 0),
    (df['CXCL10'] == 0)& (df['SPP1'] == 0),
]

#define results
results = ['CXCL10 & SPP1', 'CXCL10', 'SPP1', 'nothing']

#create new column based on conditions in column1 and column2
adata.obs['CXCL10_SPP1'] = np.select(conditions, results)

In [None]:
Cxcl10_Spp1_colors= {'CXCL10':'Yellow', 'SPP1':'Cyan', 'CXCL10 & SPP1': 'Magenta', 'nothing': 'lightgrey'}

In [None]:
sc.pl.umap(adata, color = 'CXCL10_SPP1', palette=Cxcl10_Spp1_colors, sort_order=False, save = '_Cxcl10_Spp1_coexpression.png')

In [None]:
sc.pl.violin(adata, keys='KOvsWT_up_minus_down_score', groupby='CXCL10_SPP1')

In [None]:
#define conditions
conditions = [
    (df['CD74'] > 0) & (df['SPP1'] > 0),
    (df['CD74'] > 0) & (df['SPP1'] == 0),
    (df['CD74'] == 0)& (df['SPP1'] > 0),
    (df['CD74'] == 0)& (df['SPP1'] == 0),
]

#define results
results = ['CD74 & SPP1', 'CD74', 'SPP1', 'nothing']

#create new column based on conditions in column1 and column2
adata.obs['CD74_SPP1'] = np.select(conditions, results)

In [None]:
Cd74_Spp1_colors= {'CD74':'Yellow', 'SPP1':'Cyan', 'CD74 & SPP1': 'Magenta', 'nothing': 'lightgrey'}

In [None]:
sc.pl.umap(adata, color = 'CD74_SPP1', palette=Cd74_Spp1_colors, sort_order=False, save = '_Cd74_Spp1_coexpression.png')

In [None]:
sc.pl.violin(adata, keys='KOvsWT_up_minus_down_score', groupby='CD74_SPP1')

In [None]:
df = sc.get.obs_df(adata, keys = ['SPP1', 'KOvsWT_up_minus_down_score'])

In [None]:
df = df[df['SPP1'] > 0]

In [None]:
df.shape

In [None]:
adata.obs['spp1_quartile'] = pd.qcut(df['SPP1'], q = 4, labels=['first', 'second', 'third', 'fourth'])

In [None]:
sc.pl.violin(adata, keys = 'KOvsWT_up_minus_down_score', groupby='spp1_quartile')

In [None]:
df = sc.get.obs_df(adata, keys = ['SPP1', 'CXCL9', 'CXCL10', 'CD74', 'CXCL9_SPP1', 'CXCL10_SPP1', 'CD74_SPP1','KOvsWT_up_minus_down_score', 'spp1_quartile'])

In [None]:
df.to_csv(pathlib.Path(path_to_results) / 'data_for_plotting_mac_pancreatic_cancer_{}_samples_{}_cells.csv'.format(len(adata.obs['sample'].unique()), len(adata.obs_names)))

In [None]:
adata.obs_keys