# Map single cells from Mouse Hippocampus to Space with a Slide-seqV2 dataset as the reference

In [34]:
import os
from time import time

import anndata as ad
import scanpy as sc
from SC2Spa import SI

import pandas as pd

from numpy.random import seed
from tensorflow.random import set_seed
import tensorflow as tf

## Download datasets

In [27]:
if not os.path.exists('Dataset'):
    os.makedirs('Dataset')
!wget https://figshare.com/ndownloader/files/38736651 -O Dataset/AdataMH1.h5ad
!wget https://figshare.com/ndownloader/files/38738136 -O Dataset/AMB_HC.h5ad
!wget https://figshare.com/ndownloader/files/38756529 -O Dataset/ssHippo_RCTD.csv

if not os.path.exists('tutorial1'):
    os.makedirs('tutorial1')
%cd tutorial1

--2023-01-08 18:35:56--  https://figshare.com/ndownloader/files/38736651
Resolving figshare.com (figshare.com)... 63.35.35.68, 63.32.177.41, 2a05:d018:1f4:d003:a6c:2d91:83f8:9cfb, ...
Connecting to figshare.com (figshare.com)|63.35.35.68|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/38736651/AdataMH1.h5ad?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIYCQYOYV5JSSROOA/20230108/eu-west-1/s3/aws4_request&X-Amz-Date=20230108T173557Z&X-Amz-Expires=10&X-Amz-SignedHeaders=host&X-Amz-Signature=cb594a3e00979ef0d36ad2db45bf5caeaa843eac9351c468b3a769f3dda98b85 [following]
--2023-01-08 18:35:57--  https://s3-eu-west-1.amazonaws.com/pfigshare-u-files/38736651/AdataMH1.h5ad?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIYCQYOYV5JSSROOA/20230108/eu-west-1/s3/aws4_request&X-Amz-Date=20230108T173557Z&X-Amz-Expires=10&X-Amz-SignedHeaders=host&X-Amz-Signature=cb594a3e00979ef0d36ad2db45bf5caeaa843eac9351c

## Load datasets

In [30]:
#Load
adata_ref = ad.read_h5ad('../Dataset/AdataMH1.h5ad')
adata_query = ad.read_h5ad('../Dataset/AMB_HC.h5ad')

adata_ref.var_names = adata_ref.var_names.str.upper()
adata_query.var_names = adata_query.var_names.str.upper()

adata_ref.var_names_make_unique()
adata_query.var_names_make_unique()

#Normalize
sc.pp.normalize_total(adata_ref, target_sum=1e4)
sc.pp.log1p(adata_ref)
sc.pp.normalize_total(adata_query, target_sum=1e4)
sc.pp.log1p(adata_query)

#Load annotation
Anno = pd.read_csv('../Dataset/ssHippo_RCTD.csv', index_col = 0)
Anno['MCT'] = 't'
index1 = Anno.index[(Anno['celltype_1'] == Anno['celltype_2'])]
Anno['MCT'][index1] = Anno['celltype_1'][index1]
index2 = Anno.index[(Anno['celltype_1'] != Anno['celltype_2'])]
Anno['MCT'][index2] = (Anno['celltype_1'][index2] + '_' + Anno['celltype_2'][index2]).apply(lambda x: '_'.join(sorted(set(x.split('_')))))
adata_ref.obs = adata_ref.obs.merge(Anno, left_index = True, right_index = True, how = 'left')

adata_ref.obsm['spatial'] = adata_ref.obs[['xcoord', 'ycoord']].values



adata_query.obs['common_name'] = adata_query.obs['common_name'].str.replace('?', '')
adata_query.obs['simp_name'] = adata_query.obs['common_name'].str.split('.',
                        expand = True)[0].str.split(',', expand = True)[0].str.split(' \(',
                                    expand = True)[0].str.replace('cortexm', 'cortex').replace('Medial entorrhinal cortex', 'Medial entorhinal cortex')

Variable names are not unique. To make them unique, call `.var_names_make_unique`.
  adata_query.obs['common_name'] = adata_query.obs['common_name'].str.replace('?', '')


## Select genes using Wasserstein distance (Optional)

In [None]:
sta = time()
JGs, WDs = SI.WassersteinD(adata_ref, adata_query, sparse = True,
                           WD_cutoff = 0.1, root = 'WDs/', save = 'WDs_T2')

end = time()
print((end - sta) / 60.0, 'min')

In [None]:
WD_cutoff = 0.4

root = 'WDs/'
save = 'WDs_T2'

WDs = pd.read_csv(root + save + '.csv')
JGs = sorted(WDs[WDs['Wasserstein_Distance'] < WD_cutoff]['Gene'].tolist())

## Fine Mapping

In [35]:
#Set random generator seed
seed_num = 2022
seed(seed_num)
set_seed(seed_num)
tf.keras.utils.set_random_seed(seed_num)

'''
Finely map single cells to spatial locations.
A model will be trained and saved to `root+name+'.h5'` if model_path is None and save is True.
The predicted coordinates of single cells will be saved in adata_query.obsm['spatial_mapping']
The predicted coordinates of beads will be saved in adata_ref.obsm['spatial_mapping']
Fine mapping information will be saved in adata_ref.obs['FM'] and adata_query.obs['FM']. True if a cell/bead
was mapped, otherwise False.
'''
sta = time()

neighbors, dis = SI.FineMapping(adata_ref, adata_query, sparse =True, JGs = None, 
                                model_path = None, root = 'Model_SI/',
                                name = 'SI_T2', l1_reg = 1e-5, l2_reg = 0, dropout = 0.05, epoch = 500,
                                batch_size = 4096, nodes = [4096, 1024, 256, 64, 16, 4], lrr_patience = 20,
                                ES_patience = 50, min_lr = 1e-5, save = True, polar = True,
                                n_neighbors = 1000, dis_cutoff = 20, seed = seed_num)

end = time()
print((end - sta) / 60.0, 'min')

n of Referece Genes: 20527
n of Target Genes: 24509
n of Selected Genes: 19986
(35349, 19986)
(127165, 19986)


2023-01-08 18:50:00.512863: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-08 18:50:01.267225: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22406 MB memory:  -> device: 0, name: NVIDIA TITAN RTX, pci bus id: 0000:3b:00.0, compute capability: 7.5
  model.fit_generator(generator = batch_generator(X,\


Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
2/8 [=====>........................] - ETA: 3s - loss: 0.5234 - rmse: 0.0930

KeyboardInterrupt: 