In [None]:
import os
import re

import IPython

import pandas as pd

import scanpy as sc

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

# silence scanpy that prints a lot of warnings
import warnings
warnings.filterwarnings('ignore')

# change default output directory for figures
sc.settings.figdir = './'

In [None]:
os.chdir("/project/tendonhca/albrecht/003-snakemake")

In [None]:
sample_name = "OMB0793_Quad_Enth2_T"

In [None]:
def read_and_qc(sample_name):
    r""" This function reads the data for one 10X spatial experiment into the anndata object.
    It also calculates QC metrics. Modify this function if required by your workflow.
    
    :param sample_name: Name of the sample
    """
    
    adata = sc.read_visium("results/spaceranger_count/" + str(sample_name) + '/outs',
                           count_file='filtered_feature_bc_matrix.h5', load_images=True)
    adata.obs['sample'] = sample_name
    adata.var['SYMBOL'] = adata.var_names
    adata.var.rename(columns={'gene_ids': 'ENSEMBL'}, inplace=True)
    adata.var_names = adata.var['ENSEMBL']
    adata.var.drop(columns='ENSEMBL', inplace=True)
    
    # Calculate QC metrics
    from scipy.sparse import csr_matrix
    adata.X = adata.X.toarray()
    sc.pp.calculate_qc_metrics(adata, inplace=True)
    adata.X = csr_matrix(adata.X)
    
    # add sample name to obs names
    adata.obs["sample"] = [str(i) for i in adata.obs['sample']]
    adata.obs_names = adata.obs["sample"] \
                          + '_' + adata.obs_names
    adata.obs.index.name = 'spot_id'
    
    return adata

def select_slide(adata, s, s_col='sample'):
    r""" This function selects the data for one slide from the spatial anndata object.

    :param adata: Anndata object with multiple spatial experiments
    :param s: name of selected experiment
    :param s_col: column in adata.obs listing experiment name for each location
    """
    
    slide = adata[adata.obs[s_col].isin([s]), :]
    s_keys = list(slide.uns['spatial'].keys())
    s_spatial = np.array(s_keys)[[s in k for k in s_keys]][0]
    
    slide.uns['spatial'] = {s_spatial: slide.uns['spatial'][s_spatial]}
    
    return slide

In [None]:
# Read the data into anndata objects
slides = []
for i in [sample_name]:
    slides.append(read_and_qc(i))

# Combine anndata objects together
adata = slides[0].concatenate(
    slides[1:],
    batch_key="sample",
    uns_merge="unique",
    batch_categories=[sample_name],
    index_unique=None
)
adata

In [None]:
curated_markers = pd.read_csv("data/curated_markers.tsv", sep="\t")
curated_markers.head()

In [None]:
cell_type = "Fibroblast"
markers_symbols = curated_markers['gene_symbol'][curated_markers['cell_type'] == cell_type].tolist()
markers_symbols

In [None]:
slide = select_slide(adata, sample_name)
markers_symbols_available = list(set(markers_symbols).intersection(slide.var['SYMBOL']))
fig = sc.pl.spatial(slide, img_key = "hires", cmap='magma',
                  library_id=list(slide.uns['spatial'].keys())[0],
                  color=['total_counts'] + markers_symbols_available, size=1,
                  vmin=0, vmax='p95.0',
                  gene_symbols='SYMBOL', show=False, return_fig=True,
                  save=f"-sc_pl_spatial-{cell_type}.png")

In [None]:
for cell_type in curated_markers['cell_type'].unique():
    markers_symbols = curated_markers['gene_symbol'][curated_markers['cell_type'] == cell_type].tolist()
    fig = sc.pl.spatial(
        slide, img_key = "hires", cmap='magma',
        library_id=list(slide.uns['spatial'].keys())[0],
        color=['total_counts'] + markers_symbols_available, size=1,
        vmin=0, vmax='p95.0',
        gene_symbols='SYMBOL', show=False, return_fig=True,
        save=f"-sc_pl_spatial-{cell_type}.png")
    os.rename(f"show-sc_pl_spatial-{cell_type}.png", f"tmp/{sample_name}.png")