# Tangram

 We followed the procedure outlined in the Tangram GitHub repository: https://github.com/broadinstitute/Tangram. Tangram is an integration method to combine spatial transcriptomics and single-cell RNA-seq data using a non-convex optimization alongside deep learning to leart a spatial alignment for single cell data.


## Imports

In [3]:
global_path = './cell-cell-communication/' ## Path to the github downloaded repository

In [8]:
import scanpy as sc
# import tangram as tg
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.insert(1, global_path + 'Functions/')
from process_visium import visium_noh5

sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=80, facecolor="white")

patient = 'CID4535' ### Change to run for the rest of the patients (CID4290, CID44971, CID4465)

## Load the data

In [9]:
### Spatial data
main_path_exp = global_path + 'Data/Breast/spatial_data/filtered_count_matrices/'+patient+'_filtered_count_matrix/'
main_path_spatial = global_path + 'Data/Breast/spatial_data/spatial/'+patient+'_spatial/'
library_id = 'breast_tissue'

adata_vis = visium_noh5(main_path_exp, main_path_spatial, library_id)
meta_patho = pd.read_csv(global_path+'Data/Breast/spatial_data/metadata/'+patient+'_metadata.csv') ## metadata with spot annotation

### breast cancer anndata current patient filtered
adata_ref = sc.read(global_path+'Data/Breast/normalized_log.h5ad')
adata_ref = adata_ref[adata_ref.obs.Patient == patient]

... reading from cache file cache/aloy-home-epareja-2022-single_cell-paper_folders-Data-Breast-spatial_data-filtered_count_matrices-CID4535_filtered_count_matrix-matrix.h5ad


## Select common genes between the spatial and the reference dataset

In [111]:
markers = list(set.intersection(set(adata_ref.var_names), set(adata_vis.var_names)))
tg.pp_adatas(adata_ref, adata_vis, genes=markers)


16253

In [113]:
## check if the genes are in the adata
assert "training_genes" in adata_ref.uns
assert "training_genes" in adata_vis.uns

## Map cells to spots

In [114]:
ad_map = tg.map_cells_to_space(
    adata_ref,
    adata_vis,
    mode="cells",
    density_prior="rna_count_based",
    num_epochs=500,
    device="cpu",  # or: cpu
)

INFO:root:Allocate tensors for mapping.


INFO:root:Begin training with 15666 genes and rna_count_based density_prior in cells mode...
INFO:root:Printing scores every 100 epochs.


Score: 0.326, KL reg: 0.305
Score: 0.548, KL reg: 0.001
Score: 0.558, KL reg: 0.000
Score: 0.561, KL reg: 0.000
Score: 0.562, KL reg: 0.000


INFO:root:Saving results..


In [115]:
## project cell annotations (in our case cell types) in the space: this is for visualization
tg.project_cell_annotations(ad_map, adata_vis, annotation="celltype_minor")

INFO:root:spatial prediction dataframe is saved in `obsm` `tangram_ct_pred` of the spatial AnnData.


## Select only one best for each cell.

For each cell, select the spot with the highest probability.

In [119]:
## Select the information of the spots
all(meta_patho['Unnamed: 0'].values == ad_map.var.index.values) # check
ad_map.var['pathology'] = meta_patho['Classification'].values ## add pathology to the spatial data for future analysis
annotation_spots = np.concatenate(ad_map.var[['pathology']].values)
coords = ad_map.var[['array_row', 'array_col']]
spots_order = ad_map.var.index.values

In [120]:
## Add for each cell the most probable spot with its metadata
argsort_prob = ad_map.X.argsort()
most_prob_spot = [argsort_prob[n][-1] for n in range(ad_map.X.shape[0])]

cell_coords_x = [coords.values[l][0] for l in most_prob_spot]
cell_coords_y = [coords.values[l][1] for l in most_prob_spot]
cell_annotation = [annotation_spots[l] for l in most_prob_spot]
cell_spots = [spots_order[l] for l in most_prob_spot]

ad_map.obs['cell_coords_x'] = cell_coords_x
ad_map.obs['cell_coords_y'] = cell_coords_y
ad_map.obs['cell_annotation'] = cell_annotation
ad_map.obs['cell_spots'] = cell_spots

In [None]:
ad_map.write_h5ad(global_path + 'Results/Breast/Tangram/'+patient+'_ad_map.h5ad')