In [1]:
"""
This script is used to perform deconvolution with tangram

authors: Roy Oelen
"""

'\nThis script is used to perform deconvolution with tangram\n\nauthors: Roy Oelen\n'

In [2]:
# load the libraries
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import scipy
import torch
import tangram as tg
import pickle

tg.__version__

  from .autonotebook import tqdm as notebook_tqdm


'1.0.4'

In [3]:
# objects
class MappingResult:
    """
    Object to the results of Tangram mapping
    """
    def __init__(self, spatial_object, single_cell_object, mapping):
        """constructor
        
        Parameters
        ----------
        spatial_object : AnnData
            AnnData scanpy object of ST
        single_cell_object : AnnData
            AnnData scanpy object of SC
        mapping : AnnData
            AnnData resulting file
        
        """
        self.__spatial_object = spatial_object
        self.__single_cell_object = single_cell_object
        self.__mapping = mapping
    
    def get_spatial_object(self):
        """get ST object used for the mapping
        
        
        Returns
        -------
        result
           The spatial object used for the mapping 
        """
        return self.__spatial_object
    
    def get_single_cell_object(self):
        """get SC object used for the mapping
        
        
        Returns
        -------
        result
           The single cell object used for the mapping 
        """
        return self.__single_cell_object
    
    def get_mapping(self):
        """get the mapping result object
        
        
        Returns
        -------
        result
          The object created from doing the Tangram mapping
        """
        return self.__mapping


In [4]:
def perform_tangram_mapping(slices_dict, reference, n_genes=500, mode='cells'):
    """perform tangram mapping on a dictionary
        
    Parameters
    ----------
    slices_dict : dict
        the dictionary of ST objects to do the mapping for
    reference : AnnData
        the reference AnnData SC object to use for mapping
    n_genes : int
        the top number of most variable genes to use for the mapping
    mode : str
        the mode to use
        
    Returns
    -------
    result
       a pandas dataframe instance of the expression data for the given cell type, or None if the cell type supplied doesn't exist
    """
    # we will store the results in a dictionary
    mapped_slices = {}
    # check each slice
    for slice_name,slice_object in slices_dict.items():
        # calculate variable genes
        variable_table = sc.pp.highly_variable_genes(slice_object, inplace = False, flavor='seurat_v3', n_top_genes=n_genes)
        # select the markers
        markers = list(variable_table[(variable_table["highly_variable"] == True) & (variable_table["highly_variable_rank"] <= n_genes)].index)
        # get overlapping markers
        tg.pp_adatas(reference, slice_object, genes=markers)
        # do mapping
        ad_map = tg.map_cells_to_space(
            adata_sc=reference,
            adata_sp=slice_objects_reloaded['V10A20-016_D1'],
#             device='cpu',
            device='cuda:0',
            mode=mode
        )
        tg.project_cell_annotations(ad_map, ad_sp, annotation='cell_type')
        # create an object to store the result
        mapping_result = MappingResult(slice_object, reference, ad_map)
        # put in a dictionary
        mapped_slices[slice_name] = mapping_result


In [5]:
# load the ST data
slice_objects_reloaded = None
with open(''.join(['/groups/umcg-franke-scrna/tmp02/projects/epifat/ongoing/seurat_preprocess_samples/objects/', 'spaceranger.20230823.pickle']), 'rb') as f:
    slice_objects_reloaded = pickle.load(f)

In [6]:
# read the reference
reference  = sc.read_h5ad('/groups/umcg-franke-scrna/tmp02/external_datasets/hca/Global_lognormalised.h5ad')

In [7]:
variable_table = sc.pp.highly_variable_genes(slice_objects_reloaded['V10A20-016_D1'], inplace = False, flavor='seurat_v3', n_top_genes=500)
markers = list(variable_table[(variable_table["highly_variable"] == True) & (variable_table["highly_variable_rank"] <= 100)].index)

In [8]:
tg.pp_adatas(reference, slice_objects_reloaded['V10A20-016_D1'], genes=markers)
# tg.pp_adatas(reference, slice_objects_reloaded['V10A20-016_D1'])

INFO:root:101 training genes are saved in `uns``training_genes` of both single cell and spatial Anndatas.
INFO:root:12743 overlapped genes are saved in `uns``overlap_genes` of both single cell and spatial Anndatas.
INFO:root:uniform based density prior is calculated and saved in `obs``uniform_density` of the spatial Anndata.
INFO:root:rna count based density prior is calculated and saved in `obs``rna_count_based_density` of the spatial Anndata.


In [9]:
ad_map = tg.map_cells_to_space(
    adata_sc=reference,
    adata_sp=slice_objects_reloaded['V10A20-016_D1'],
#     device='cpu',
    device='cuda:0',
    mode='clusters'
)

ValueError: A cluster_label must be specified if mode is 'clusters'.

In [None]:
# try with all of them
mapping_all = perform_tangram_mapping(slice_objects_reloaded, reference)