In [None]:
import anndata
import pandas as pd
import numpy as np
import matplotlib as plt
from scipy import sparse
import scanpy as sc
import seaborn as sns
import Scanpyplus
from matplotlib.pyplot import savefig

plt.rcParams.update({'figure.figsize': (20,10)})

In [None]:
#read in the ISS h5ad output
iss=sc.read_h5ad('../path/to/file.h5ad')

In [None]:
#filter out cells expressing 3 or fewer genes
sc.pp.filter_cells(iss, min_genes=3)

In [None]:
#logtrasnform and scale
sc.pp.log1p(iss)
sc.pp.scale(iss)

In [None]:
#load in single cell reference- raw counts, keeping shared genes only and removing cells lacking 3 of these genes
adata = sc.read('../path/to/file.h5ad')
adata = adata[:, iss.var_names]
sc.pp.filter_cells(adata, min_genes=3)
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)

In [1]:
#seperate out the non shared space single cell data for later imputation
bdata = adata[:, [i not in iss.var_names for i in adata.var_names]]

In [None]:
#annoy index from single cell - iss becomes query
from annoy import AnnoyIndex 

ckd = AnnoyIndex(adata.X.shape[1],metric="euclidean") #vector of dimensions equal to number of genes
for i in np.arange(adata.X.shape[0]):
    #ckd.add_item(i,np.squeeze(np.asarray(adata.X[i,:].todense())))
    ckd.add_item(i,np.squeeze(np.asarray(adata.X[i,:])))
ckd.build(10) # higher n_trees -> higher precision

ckdo_ind = []
ckdo_dist = []
for i in np.arange(iss.X.shape[0]):
    holder = ckd.get_nns_by_vector(iss.X[i,:],15,include_distances=True)
    ckdo_ind.append(holder[0])
    ckdo_dist.append(holder[1])

In [None]:
#now impute genes not included in the panel using the non-shared single cell object
knn_expr_means = []
celltype=[]
for i in np.arange(len(ckdo_ind)):
    #subset the object to the neighbours of the i'th ISS cell
    #then compute an axis=0 mean to get the per-gene average for those neighbours
    #turn to a CSR and store it in a list matching the ISS order
    knn_expr_means.append(sparse.csr_matrix(bdata[ckdo_ind[i]].X.mean(axis=0)))
    celltype.append(bdata[ckdo_ind[i]].obs.leiden_R_anno_id.value_counts().index[0])

    
#create new adata object with imputed identity and expression profile
cdata = anndata.AnnData(sparse.vstack(knn_expr_means))
cdata.obs['celltype'] = celltype
cdata.var_names = bdata.var_names
cdata.obs_names = iss.obs_names
iss.obs['celltype']=cdata.obs['celltype']
cdata.obs=iss.obs
cdata.obsm=iss.obsm

In [None]:
####plotting####
plt.rcParams.update({'figure.figsize': (20,10)})

In [None]:
#all celltypes
sc.pl.embedding(cdata,basis='spatial',color='fineanno', size=20)

In [None]:
#subset for each plot to allow custom colours
plot1 = cdata[cdata.obs["celltype"].isin(["SCP", "Dermal Fibroblast","Vascular Endothelium","Periderm", "Sinusoidal Endothelium","Mesenchymal Condensate 2", "Macrophage"])].copy()
sc.pl.embedding(plot1,basis='spatial',color='celltype',palette={"Sinusoidal Endothelium": "green","Vascular Endothelium": "green", "Macrophage": "magenta","Mesenchymal Condensate 2": "orange", "Dermal Fibroblast": "blue", "SCP": "#f4cccc", "Periderm":"#a8cddf"},save=".png", size=60)