In [None]:
import os
import scanpy as sc
import squidpy as sq
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
#os.getcwd()
os.chdir("/ceph/project/tendonhca/albrecht/003-snakemake/")

In [None]:
sample_names = ["OMB1277_SSP_Enth_H", "OMB1541_GluMed_MTJ_H"]

In [None]:
def read_and_qc(sample_name):
    r""" This function reads the data for one 10X spatial experiment into the anndata object.
    It also calculates QC metrics. Modify this function if required by your workflow.
    
    :param sample_name: Name of the sample
    """
    
    adata = sc.read_visium("results/spaceranger_count/" + str(sample_name) + '/outs',
                           count_file='filtered_feature_bc_matrix.h5', load_images=True)
    adata.obs['sample'] = sample_name
    adata.var['SYMBOL'] = adata.var_names
    adata.var.rename(columns={'gene_ids': 'ENSEMBL'}, inplace=True)
    adata.var_names = adata.var['ENSEMBL']
    adata.var.drop(columns='ENSEMBL', inplace=True)
    
    # Calculate QC metrics
    from scipy.sparse import csr_matrix
    adata.X = adata.X.toarray()
    sc.pp.calculate_qc_metrics(adata, inplace=True)
    adata.X = csr_matrix(adata.X)
    adata.var['mt'] = [gene.startswith('MT-') for gene in adata.var['SYMBOL']]
    adata.var["ribo"] = adata.var['SYMBOL'].str.startswith(("RPS", "RPL"))
    adata.var["mtrnr"] = adata.var['SYMBOL'].str.startswith(("MTRNR"))
    adata.obs['mt_frac'] = adata[:, adata.var['mt'].tolist()].X.sum(1).A.squeeze()/adata.obs['total_counts']
    
    # add sample name to obs names
    adata.obs["sample"] = [str(i) for i in adata.obs['sample']]
    adata.obs_names = adata.obs["sample"] \
                          + '_' + adata.obs_names
    adata.obs.index.name = 'spot_id'
    
    return adata

def select_slide(adata, s, s_col='sample'):
    r""" This function selects the data for one slide from the spatial anndata object.

    :param adata: Anndata object with multiple spatial experiments
    :param s: name of selected experiment
    :param s_col: column in adata.obs listing experiment name for each location
    """
    
    slide = adata[adata.obs[s_col].isin([s]), :]
    s_keys = list(slide.uns['spatial'].keys())
    s_spatial = np.array(s_keys)[[s in k for k in s_keys]][0]
    
    slide.uns['spatial'] = {s_spatial: slide.uns['spatial'][s_spatial]}
    
    return slide

In [None]:
# Read the data into anndata objects
slides = []
for i in sample_names:
    slides.append(read_and_qc(i))

In [None]:
for adata in slides:
    adata.var_names_make_unique()
    sc.pp.calculate_qc_metrics(adata, qc_vars=["mt", "ribo"], inplace=True)

In [None]:
for adata in slides:
    g = sns.displot(adata.obs["total_counts"], kde=False)
    g.fig.suptitle(adata.obs['sample'][0])
    g = sns.displot(adata.obs["total_counts"][adata.obs["total_counts"] < 10000], kde=False, bins=40)
    g.fig.suptitle(adata.obs['sample'][0])
    g = sns.displot(adata.obs["n_genes_by_counts"], kde=False, bins=60)
    g.fig.suptitle(adata.obs['sample'][0])
    g = sns.displot(adata.obs["n_genes_by_counts"][adata.obs["n_genes_by_counts"] < 4000], kde=False, bins=60)
    g.fig.suptitle(adata.obs['sample'][0])

In [None]:
sc.pp.filter_cells(slides[0], min_counts=500)
sc.pp.filter_cells(slides[0], max_counts=10000)
sc.pp.filter_genes(slides[0], min_cells=10)

sc.pp.filter_cells(slides[1], min_counts=100)
sc.pp.filter_cells(slides[1], max_counts=8000)
sc.pp.filter_genes(slides[1], min_cells=10)

In [None]:
for adata in slides:
    sc.pl.violin(adata, 'mt_frac', xlabel=adata.obs["sample"][0], ylabel="mt_frac")

In [None]:
for idx, adata in enumerate(slides):
    print(f"Sample: {adata.obs['sample'][0]}")
    print(f"#genes before MT filter: {adata.n_vars}")
    
    # remove MT genes for spatial mapping (keeping their counts in the object)
    adata.obsm['MT'] = adata[:, adata.var['mt'].values].X.toarray()
    slides[idx] = adata[:, ~adata.var['mt'].values]
    print(f"#genes after MT filter: {slides[idx].n_vars}")
    print("==============================")

In [None]:
for idx, adata in enumerate(slides):
    print(f"Sample: {adata.obs['sample'][0]}")
    print(f"#genes before MTrnr filter: {adata.n_vars}")
    adata.obsm['MTRNR'] = adata[:, adata.var['mtrnr'].values].X.toarray()
    slides[idx] = adata[:, ~adata.var['mtrnr'].values]
    print(f"#genes after MTrnr filter: {slides[idx].n_vars}")
    print("==============================")

In [None]:
for idx, adata in enumerate(slides):
    print(f"Sample: {adata.obs['sample'][0]}")
    print(f"#genes before RIBO filter: {adata.n_vars}")
    adata.obsm['ribo'] = adata[:, adata.var['ribo'].values].X.toarray()
    slides[idx] = adata[:, ~adata.var['ribo'].values]
    print(f"#genes after RIBO filter: {slides[idx].n_vars}")
    print("==============================")

In [None]:
for idx, adata in enumerate(slides):
    slides[idx].layers['counts'] = adata.X.copy()
    sc.pp.normalize_total(adata, inplace=True)
    sc.pp.log1p(adata)
    slides[idx].layers['normcounts'] = adata.X.copy()
    sc.pp.highly_variable_genes(adata, flavor="cell_ranger", n_top_genes=2000)

In [None]:
for adata in slides:
    sc.pp.scale(adata)
    sc.pp.pca(adata)
    sc.pp.neighbors(adata)
    sc.tl.umap(adata)
    sc.tl.leiden(adata, key_added="clusters")

In [None]:
plt.rcParams["figure.figsize"] = (4, 4)
for adata in slides:
    name = adata.obs['sample'][0]
    sc.pl.umap(adata, color=["total_counts", "n_genes_by_counts", "clusters"], wspace=0.4,
               save=f'_{name}_countsAndClusters_umaps.svg')

In [None]:
plt.rcParams["figure.figsize"] = (8, 8)
for adata in slides:
    name = adata.obs['sample'][0]
    sc.pl.spatial(adata, img_key="hires", color=["total_counts", "n_genes_by_counts"], vmax='p95.0',
                  save=f'_{name}_counts.svg')

In [None]:
for adata in slides:
    name = adata.obs['sample'][0]
    sq.gr.spatial_neighbors(adata, coord_type='generic', radius=3.0)
    sq.pl.spatial_scatter(adata, shape='circle', color='clusters', img_alpha=0.8,
                  frameon=False, figsize=(7, 3.5),
                  size=1.5, connectivity_key='spatial_connectivities', edges_width=2,
                  save=f'_{name}_connectivities_clusters1_spatialmap.svg'
                 )

In [None]:
for adata in slides:
    name = adata.obs['sample'][0]
    sc.tl.rank_genes_groups(adata, "clusters", method="wilcoxon", layer='normcounts', use_raw=False) # use_raw=None seems to translate to True which causes error
    sc.pl.rank_genes_groups_heatmap(adata, n_genes=5, groupby="clusters", 
                                    save=f'_{name}_heatmap_wilcoxonClusterDEGs.svg'
                                    #gene_symbols='SYMBOL'
                                   )

In [None]:
slides[0].var