/
tangram.py
82 lines (67 loc) · 2.56 KB
/
tangram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from ....tools.decorators import method
from ....tools.utils import check_version
from ..utils import split_sc_and_sp
@method(
method_name="Tangram",
method_summary=(
"Tangram is a method to map gene expression signatures from scRNA-seq data to"
" spatial data. It performs the cell type mapping by learning a similarity"
" matrix between single-cell and spatial locations based on gene expression"
" profiles."
),
paper_name=(
"Deep learning and alignment of spatially resolved single-cell transcriptomes"
" with Tangram"
),
paper_reference="biancalani2021deep",
paper_year=2021,
code_url="https://github.com/broadinstitute/Tangram",
image="openproblems-python-pytorch",
)
def tangram(adata, test=False, num_epochs=None, n_markers=None):
# analysis based on:
# github.com/broadinstitute/Tangram/blob/master/tutorial_tangram_with_squidpy.ipynb
# using tangram from PyPi, not github version
import pandas as pd
import scanpy as sc
import tangram as tg
import torch
if test:
num_epochs = num_epochs or 10
n_markers = n_markers or 10
else: # pragma: nocover
num_epochs = num_epochs or 1000
n_markers = n_markers or 100
# get single cell reference
ad_sc, adata = split_sc_and_sp(adata)
# pre-process single cell data
sc.pp.normalize_total(ad_sc, 1e4)
sc.pp.log1p(ad_sc)
# identify marker genes
sc.tl.rank_genes_groups(ad_sc, groupby="label", use_raw=False)
# extract marker genes to data frame
markers_df = pd.DataFrame(ad_sc.uns["rank_genes_groups"]["names"]).iloc[
0:n_markers, :
]
# get union of all marker genes
markers = list(set(markers_df.melt().value.values))
# match genes between single cell and spatial data
tg.pp_adatas(ad_sc, adata, genes=markers)
# get device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# map single cells to spatial locations
ad_map = tg.map_cells_to_space(
ad_sc,
adata,
device=device,
num_epochs=num_epochs,
)
# transfer labels from mapped cells to spatial location
tg.project_cell_annotations(adata_map=ad_map, adata_sp=adata, annotation="label")
# normalize scores
pred_props = adata.obsm["tangram_ct_pred"].to_numpy()
adata.obsm["proportions_pred"] = pred_props / pred_props.sum(axis=1)[:, None]
# remove un-normalized predictions
del adata.obsm["tangram_ct_pred"]
adata.uns["method_code_version"] = check_version("tangram-sc")
return adata