# Keap tumors WT vs KO
Python analysis using scanpy 1.9 Docker image: cr.gitlab.uzh.ch/elena.duerst/docker-images/scanpy1p9_bioc:0.5

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
import pathlib
import anndata as anndata
import scvelo as scv
import matplotlib as mpl
import decoupler as dc

In [None]:
import sys
sys.path.insert(0, "../../shared/src")
import decoupler_helpers
import scanpy_cluster_proportions

In [None]:
new_rc_params = {'text.usetex': False,
"svg.fonttype": 'none'
}
mpl.rcParams.update(new_rc_params)

In [None]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=100, facecolor='white', figsize = (4,4), dpi_save=300, frameon = False)
path_to_results = pathlib.Path('../results/')
sc.settings.figdir = path_to_results
umap_point_size = 30
umap_transparency = 0.3
umap_continuous_point_size = 30
umap_continuous_transparency = 0.7
aspect_ratio = 1
save_figure = False

In [None]:
intermediate_data_path = pathlib.Path('../data/intermediate/')
intermediate_data_path.mkdir(parents=True, exist_ok=True)
results_file = pathlib.Path(intermediate_data_path) / 'preprocessed.h5ad'
data_path = pathlib.Path('../data/raw')

# Data import

In [None]:
samples = ['KeapWT1_day5', 'KeapWT1_day1', 'KeapWT3_day5', 'KeapKO_day5', 'KeapKO_day1', 'KeapWT3_day1'] # in order to get the exact same result, it is necessary to use this order

In [None]:
adatas = {}
for sam in samples:
    adata = sc.read_10x_h5(data_path / '{}_sample_filtered_feature_bc_matrix.h5'.format(sam))
    adata.obs['sample'] = sam
    day = sam.split('_')[1]
    adata.obs['day'] = day
    genotype = 'WT' if 'WT' in sam else 'KO'
    print(genotype)
    #sample_type = sam.split('_')[1]
    #adata.obs['sample_type'] = sample_type
    adata.obs['genotype'] = genotype
    adata.obs['genotype_day'] = genotype + '_' + day
    print(genotype + '_' + day)
    print(adata.shape)
    adata.var_names_make_unique()
    adatas[sam] = adata

# QC

In [None]:
 for name,adata in adatas.items():
    adata.var['mt'] = adata.var_names.str.startswith('mt-')
    sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
    adata.var['Rp'] = adata.var_names.str.startswith('Rp') 
    sc.pp.calculate_qc_metrics(adata, qc_vars=['Rp'], percent_top=None, log1p=False, inplace=True)

In [None]:
 for name, ad in adatas.items():
    print(name)
    sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_Rp'],
             jitter=0.4, multi_panel=True, save = f"{name}.png" if save_figure else None)

In [None]:
for name, adata in adatas.items():
    print(name)
    sc.pl.scatter(adata, x='total_counts', y='pct_counts_mt', color= 'n_genes_by_counts')
    sc.pl.scatter(adata, x='total_counts', y='n_genes_by_counts', color = "pct_counts_mt")
    sc.pl.scatter(adata, x='pct_counts_mt', y='pct_counts_Rp', color = 'n_genes_by_counts')

In [None]:
min_numof_genes = 200
max_numof_genes = 8000#7000
for name, adata in adatas.items():
    fig, axs = plt.subplots(1, 4, figsize=(12, 3))
    fig.suptitle(f"Covariates for filtering: {name}")

    sns.histplot(adata.obs["total_counts"], kde=False, ax=axs[0])
    sns.histplot(
        adata.obs["total_counts"][adata.obs["total_counts"] < 40000],
        kde=False,
        bins=40,
        ax=axs[1],
    )
    plot = sns.histplot(adata.obs["n_genes_by_counts"], kde=False, bins=60, ax=axs[2])
    plot.axvline(x = min_numof_genes, color = 'red')
    plot.axvline(x = max_numof_genes, color = 'red')
    sns.histplot(
        adata.obs["n_genes_by_counts"][adata.obs["n_genes_by_counts"] < min_numof_genes + 1000],
        kde=False,
        bins=60,
        ax=axs[3],
    )
    plt.axvline(x = min_numof_genes, color = 'red')

# Filtering

In [None]:
max_pct_mt = 5
for name, adata in adatas.items():
    adatas[name] = adata[adata.obs['pct_counts_mt'] < max_pct_mt , :]
    #helpers.assert_all_finite(adata.X)

In [None]:
 for adata in adatas.values():
    sc.pp.filter_cells(adata, min_genes = min_numof_genes)
    sc.pp.filter_cells(adata, max_genes = max_numof_genes)
    sc.pp.filter_genes(adata, min_cells = 1)
    #helpers.assert_all_finite(adata.X)
    print(adata.shape)

# QC after filtering

In [None]:
for name, adata in adatas.items():
    sc.pl.violin(adata, ['n_genes_by_counts', 'total_counts', 'pct_counts_mt', 'pct_counts_Rp'],
             jitter=0.4, multi_panel=True, save = f"{name}_filtered.png" if save_figure else None)

In [None]:
adata = anndata.concat(adatas, join = "outer", fill_value=0)
adata.obs_names_make_unique()

In [None]:
del adatas

# Normalization, transformation

In [None]:
sc.pp.normalize_total(adata, target_sum=None)
sc.pp.log1p(adata)

# Dimensionality reduction & clustering

In [None]:
sc.pp.highly_variable_genes(adata)

In [None]:
sc.pp.pca(adata, n_comps = 20, use_highly_variable=True)

In [None]:
sc.pp.neighbors(adata)

In [None]:
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata, resolution=0.8)

In [None]:
sc.tl.embedding_density(adata, groupby='genotype_day')

In [None]:
for sam in adata.obs['sample'].cat.categories:
    fig = sc.pl.embedding_density(adata, groupby='genotype_day', group = sam, color_map = 'Greys', return_fig=True)
    fig.savefig(path_to_results / 'umap_density_sample_{}.png'.format(sam), transparent = True)

# Cell type annotation

In [None]:
cell_type_marker_genes = { 
                 'Macrophages': ['Ptprc','Cd68', 'Csf1r', 'Adgre1'],
                 'Tumor cells': ['Twist1', 'Tead1'],
                }

In [None]:
for cell_type in cell_type_marker_genes.keys():
    print(cell_type)
    for gene in cell_type_marker_genes[cell_type]:
        fig, (ax1) = plt.subplots(1,1)
        ax1.set_aspect('equal')
        sc.pl.umap(adata, color = gene, size=umap_continuous_point_size, alpha = umap_continuous_transparency, ax = ax1, save = '_{}.png'.format(gene) if save_figure else None)

In [None]:
mac_clusters = ['5','8', '9', '11', '12', '13']

In [None]:
adata.obs['cell_type'] = np.where(adata.obs.leiden.isin(mac_clusters), 'Macrophages', 'TC')

In [None]:
fig, (ax1) = plt.subplots(1,1)
ax1.set_aspect('equal')
sc.pl.umap(adata, color = 'cell_type', size=umap_point_size, alpha = umap_transparency, ax = ax1, save = '_{}.png'.format(obs_key))

# Tumor cell characterization

In [None]:
ad_tc = adata[adata.obs.cell_type == 'TC']

In [None]:
sc.tl.leiden(ad_tc, resolution = 0.3)

In [None]:
fig, (ax1) = plt.subplots(1,1)
ax1.set_aspect('equal')
sc.pl.umap(ad_tc, color = 'leiden', size=umap_point_size, alpha = umap_transparency, ax = ax1, save = '_leiden.png')

In [None]:
sc.tl.rank_genes_groups(ad_tc, groupby='leiden', method = 'wilcoxon')

In [None]:
sc.pl.rank_genes_groups(ad_tc, sharey=False)

In [None]:
props = scanpy_cluster_proportions.get_cluster_proportions(ad_tc, cluster_key='leiden', sample_key='genotype_day')

In [None]:
fig = scanpy_cluster_proportions.plot_cluster_proportions(props)

In [None]:
props.to_csv(path_to_results / 'leiden_proportions.csv')

In [None]:
gene_list = ['Vcan','Sparc','Fn1', 'Col3a1', 'Col6a1', 'Egln1', 'Mmp2', # EMT
            'Top2a', 'Mki67', 'Cdc20'] # Proliferation

In [None]:
for gene in gene_list:
    fig, (ax1) = plt.subplots(1,1)
    ax1.set_aspect('equal')
    sc.pl.umap(ad_tc, color = gene, size=umap_continuous_point_size, alpha= umap_continuous_transparency, vmin = 'p0.1', vmax = 'p99.9', ax = ax1, save = '_{}.png'.format(gene) if save_figure else None)

# Save file

In [None]:
ad_mac.write(path_to_results / 'analysed_macrophages.h5ad')

In [None]:
ad_tc.write(path_to_results / 'analysed_tumor_cells.h5ad')