In [None]:
import numpy as np
import pandas as pd
import scanpy as sc

In [None]:
from matplotlib.pyplot import hist

In [None]:
import gc

In [None]:
import spatialdm as sdm
import spatialdm.plottings as pl

In [None]:
import squidpy as sq

In [None]:
# load the pre-processed dataset
img = sq.datasets.visium_hne_image()
adata = sq.datasets.visium_hne_adata()

In [None]:
# sq.gr.spatial_neighbors(adata)
# sq.gr.nhood_enrichment(adata, cluster_key="cluster")
# sq.pl.nhood_enrichment(adata, cluster_key="cluster")

Set up

In [None]:
## SpatialDM tutorial data
# adata = sdm.datasets.melanoma()
# raw = pd.DataFrame(adata.raw.X, index=adata.obs_names, columns=adata.raw.var_names)
# log = pd.DataFrame(adata.X, index=adata.obs_names, columns=adata.var_names)

In [None]:
raw = pd.DataFrame(adata.raw.X.todense(), index=adata.obs_names, columns=adata.raw.var_names)
log = pd.DataFrame(adata.X.todense(), index=adata.obs_names, columns=adata.var_names)
# raw = log # yeah raw is just dumb

In [None]:
spatialcoord = pd.DataFrame(adata.obsm['spatial'], index=adata.obs_names, columns=['x','y'])

In [None]:
# Preprocessing
my_sample = sdm.SpatialDM(log, raw, spatialcoord)     # load spatial data with simply log, raw, spatial input

In [None]:
my_sample.extract_lr(species='mouse', min_cell=3)

We set radial basis kernel parameter l = 1.2, and trimmed all weights < 0.2 (cutoff) to match the normal range of CCC (200 micrometers, 1 spot away from the sender cell here)

In [None]:
my_sample.weight_matrix(l=200, cutoff=0.2, single_cell=False)  # Not single-cell resolution

In [None]:
import matplotlib.pyplot as plt
plt.scatter(my_sample.spatialcoord.x, my_sample.spatialcoord.y, c=my_sample.rbf_d[50])

In [None]:
my_sample.rbf_d

Global Moran's I

In [None]:
%%time
# Global selection of significant pairs
my_sample.spatialdm_global(1000, method='both', select_num=None)  # complete in seconds
my_sample.sig_pairs(method='permutation', fdr=True, threshold=0.1)  # select significant pairs

Moran's global I per LR across the slide

In [None]:
res = my_sample.global_res

In [None]:
res['global_I'] = my_sample.global_I

In [None]:
res

In [None]:
res[res.index.str.contains('SEMA4D_PLXNB3')][res.global_I > 0.2].sort_values(by='global_I')

In [None]:
pl.global_plot(my_sample, pairs=['CSF1_CSF1R'])  # Overview of global selection

Local Moran's I

In [None]:
# Local selection of significant spots
my_sample.spatialdm_local(n_perm=100, method='both', select_num=None, nproc=1)     # local spot selection complete in seconds
my_sample.sig_spots(threshold=0.1)  # significant local spots
pl.plot_pairs(my_sample, ['CSF1_CSF1R'], marker='s') # visualize known melanoma pair(s)

In [None]:
my_sample.local_z

Re-implement Moran's Global I

1. DataFrame /w L & R 
2. Calulcate L & R means & props (accounting for complexes)
3. Re-implement radial kernel weight
4. Moran's R
5. p-val /w Permutations
6. z-score pvals

In [None]:
adata

In [None]:
from liana.method._pipe_utils._pre import prep_check_adata
from liana.resource import select_resource, explode_complexes
from threadpoolctl import threadpool_limits

In [None]:
from liana.method._pipe_utils import prep_check_adata, filter_resource

In [None]:
from liana.method._liana_pipe import filter_reassemble_complexes

In [None]:
_key_cols = ['ligand_complex', 'receptor_complex']

In [None]:
adata = prep_check_adata(adata, groupby='cluster', obsm_keys=['spatial'], min_cells=3)

In [None]:
resource = select_resource('MouseConsensus')

In [None]:
adata.var_names

In [None]:
adata.obsm

In [None]:
# # fix this later
# temp = sq.datasets.visium_hne_adata()
# # adata.layers['counts'] = temp.raw.X THIS IS JUST BAD!!
# del temp
# gc.collect()

Process Resource

In [None]:
resource = explode_complexes(resource)

In [None]:
resource = filter_resource(resource, adata.var_names)

In [None]:
resource

Get slide means

In [None]:
entities = np.union1d(np.unique(resource["ligand"]),
                      np.unique(resource["receptor"]))

In [None]:
# Filter to only include the relevant genes
adata = adata[:, np.intersect1d(entities, adata.var.index)]

In [None]:
lr_stats = pd.DataFrame({'means': adata.X.mean(axis=0).A.flatten(), 
                        'props': adata.X.getnnz(axis=0) / adata.X.shape[0]},
                        index=adata.var_names).reset_index().rename(columns={'index': 'gene'})

In [None]:
def _rename_means(lr_stats, entity):
    df = lr_stats.copy()
    df.columns = df.columns.map(lambda x: entity + '_' + str(x) if x!='gene' else 'gene')
    return df.rename(columns = {'gene':entity})

In [None]:
lr_res = resource.merge(_rename_means(lr_stats, entity='ligand')).merge(_rename_means(lr_stats, entity='receptor'))

In [None]:
# # Filter non-expressed
# lr_res = lr_res[(lr_res.ligand_props > 0.05) & (lr_res.receptor_props > 0.05)]

Recomplexify

In [None]:
lr_res = filter_reassemble_complexes(lr_res=lr_res,
                                     _key_cols=_key_cols,
                                     expr_prop=0.05,
                                     complex_cols=['ligand_means', 'receptor_means'])

In [None]:
lr_res[lr_res.interaction.str.contains('Plxnb3')]

For now I will use their spatial kernel weights

In [None]:
rbf_d = my_sample.rbf_d

In [None]:
# every spot to every spot:
my_sample.rbf_d.shape[0]

(Vectorized) For all interactions:

norm expr mat - i.e. (x - x^) & (y - y^)

In [None]:
norm_mat = np.array(adata.X - np.array(adata.X.mean(axis=0)))
sqrt_a = np.sqrt(np.sum(np.power(norm_mat, 2), axis=0)).T

In [None]:
ligand_pos = {entity: np.where(adata.var_names == entity)[0][0] for entity
              in lr_res['ligand']}
receptor_pos = {entity: np.where(adata.var_names == entity)[0][0] for entity
                in lr_res['receptor']}

Calculate Global Moran's R

In [None]:
import scipy
from tqdm import tqdm

In [None]:
# essential for this to be sparse
dist = scipy.sparse.csr_matrix(my_sample.rbf_d)

In [None]:
norm_mat = np.array(adata.X - np.array(adata.X.mean(axis=0))) # normalized x matrix

In [None]:
norm_mat = norm_mat / np.sqrt(np.sum(norm_mat**2, axis=0, keepdims=True))

In [None]:
# sqrt_a = np.sqrt(np.sum(np.power(norm_mat, 2), axis=0)).T # sqrt matrix
# positions
ligand_pos = {entity: np.where(adata.var_names == entity)[0][0] for entity
              in lr_res['ligand']}
receptor_pos = {entity: np.where(adata.var_names == entity)[0][0] for entity
                in lr_res['receptor']}

In [None]:
n_perm = 1000

In [None]:
ligand_mat = np.array([norm_mat[:, ligand_pos[ligand]] for ligand in lr_res.ligand])
receptor_mat = np.array([norm_mat[:, receptor_pos[receptor]] for receptor in lr_res.receptor])
# ligand_sqrt = np.array([sqrt_a[ligand_pos[ligand]] for ligand in lr_res.ligand])
# receptor_sqrt = np.array([sqrt_a[receptor_pos[receptor]] for receptor in lr_res.receptor])

global_r for all

In [None]:
global_r = ((ligand_mat @ dist)* receptor_mat).sum(axis=1)

In [None]:
lr_res['global_r'] = global_r

Create a perm_matrix for all LRs

Permutation p-vals

In [None]:
rng = np.random.default_rng()

In [None]:
idx = norm_mat.shape[0]

In [None]:
# mat /w n_perms x LR_n
perm_mat = np.zeros((n_perm, global_r.shape[0]))

In [None]:
%%time
for perm in tqdm(range(n_perm)):
    _idx = rng.permutation(idx)
    perm_mat[perm,:] = ((ligand_mat[:, _idx] @ dist) * receptor_mat).sum(axis=1)

In [None]:
global_pvals = 1 - (global_r > perm_mat).sum(axis=0) / n_perm

In [None]:
lr_res['global_pvals'] = global_pvals

In [None]:
lr_res.sort_values('global_r')

Z-score p-vals

In [None]:
from scipy import stats

In [None]:
dist = np.array(dist.todense())

In [None]:
# global distance variance (~copy pasted)
n = adata.shape[0]
nm = (n ** 2 * (dist * dist).sum()) - (2 * n * (dist.sum(axis=1) * dist.sum(axis=0)).sum()) + (dist.sum() ** 2)
dm = n ** 2 * (n - 1) ** 2

In [None]:
# squered variance
dist_var = nm/dm  # aka st

In [None]:
dist_var_sq = dist_var ** (1 / 2)

In [None]:
dist_var_sq

In [None]:
global_zscores = global_r / dist_var_sq

In [None]:
global_zpvals = stats.norm.sf(global_zscores)

In [None]:
# assign to LR
lr_res['global_zscores'] = global_zscores

In [None]:
lr_res['global_zpvals'] = global_zpvals

Compare global

In [None]:
lr_res.interaction = lr_res.interaction.str.upper()
lr_res.interaction = lr_res.interaction.str.replace('|', '_')

In [None]:
res = res.reset_index().rename(columns={'index':'interaction'})

In [None]:
res = res[[len(rec) < 2 for rec in res.receptor]]

In [None]:
lr_res[lr_res.interaction=="SEMA4D_PLXNB3"]

In [None]:
res[res.interaction=="SEMA4D_PLXNB3"]

In [None]:
# If "exponential" the weights are calculated based on the distance to the spatial
# unit \var{j} and the parameter \code{l} using the exponential function
# \deqn{w_{ij} = e^{-\frac{d_{ij}}{l}}}{w(i,j) = exp(-d(i,j)/l)}
# '
# ' The parameter \code{l} here denotes signaling length. For more information
# ' consult Oyler-Yaniv et. al. Immunity 46(4) 2017.
# '
# ' If "linear" the weights are calculated based on the distance to the spatial
# ' unit \var{j} and the parameter \code{l} using the linear function
# ' \deqn{w_{ij} = 1- d(i,j)/l}{w(i,j) = 1- d(i,j)/l}

In [None]:
joined = lr_res.merge(res, on='interaction')

In [None]:
joined.sort_values('global_r')

In [None]:
import scipy

In [None]:
joined

In [None]:
scipy.stats.pearsonr(joined.fdr,  joined.global_pvals)

In [None]:
scipy.stats.pearsonr(joined.z_pval,  joined.global_zpvals)

### Local Moran's I

In [None]:
import scipy

In [None]:
dist = scipy.sparse.csr_matrix(my_sample.rbf_d)

In [None]:
# norm_mat = np.array(adata.X - np.array(adata.X.mean(axis=0))) # normalized x matrix
# Note here - there is no normalisation by the squares

In [None]:
norm_mat = np.array(adata.X - np.array(adata.X.mean(axis=0)))

In [None]:
ligand_mat = np.array([norm_mat[:, ligand_pos[ligand]] for ligand in lr_res.ligand]).T
receptor_mat = np.array([norm_mat[:, receptor_pos[receptor]] for receptor in lr_res.receptor]).T

In [None]:
# where at least one is positive
(np.array([-1, 0, 1, -1, 1]) > 0) | (np.array([-1, 0, 1, -1, -1]) > 0)

In [None]:
# where either L or R is positive
pos_msk = ((ligand_mat > 0) + (receptor_mat > 0)).T

In [None]:
# calculate both sides for local_Rs
local_x = ligand_mat * (dist @ receptor_mat) # local_I
local_y = receptor_mat * (dist @ ligand_mat) # local_I_R

mask?

In [None]:
# # also only sig on global (i.e. only positive by default)?
# lr_msk = global_pvals <= 0.05

In [None]:
# he used to filter spot to spot to be positive
# i guess to exclude - to - positive correlations (does this even make sense...)

In [None]:
# local_x = local_x[:, lr_msk]
# local_y = local_y[:, lr_msk]
# assert local_x.shape == local_y.shape

In [None]:
# receptor_mat = receptor_mat[:, lr_msk]
# ligand_mat = ligand_mat[:, lr_msk]

Permutations

In [None]:
spot_n = local_x.shape[0]
lr_n = local_x.shape[1]
n_perm = 25

In [None]:
perm_x = np.zeros((lr_n, n_perm, spot_n))
perm_y = np.zeros((lr_n, n_perm, spot_n))

In [None]:
for i in tqdm(range(n_perm)):
    _idx = np.random.permutation(ligand_mat.shape[0]) # TODO RNG
    perm_x[:, i,:] = ((dist @ receptor_mat[_idx, :]) * ligand_mat).T
    perm_y[:, i,:] = ((dist @ ligand_mat[_idx, :]) * receptor_mat).T


In [None]:
local_pvals = ((np.expand_dims(local_x.T + local_y.T, 1) <= (perm_x + perm_y)).sum(1)) / n_perm

In [None]:
local_r = local_x + local_y # actual value

In [None]:
local_r.shape

In [None]:
max(local_r[:,903])

In [None]:
hist(local_r[:,903])

In [None]:
their_r = (my_sample.local_I + my_sample.local_I_R)

In [None]:
their_r.shape

In [None]:
hist(their_r[:,200])

In [None]:
max(their_r[:, 531])

Compare local pvals

In [None]:
# # positve to positive local only
# pos = ((ligand_mat > 0) + (receptor_mat > 0)).T # he does it on non-norm counts, and not -mu^
# pvals_msked = local_pvals
# pvals_msked[pos] = 1

In [None]:
ours = pd.DataFrame(local_pvals, index=lr_res.interaction).reset_index()

In [None]:
ours

In [None]:
theirs =  my_sample.local_perm_p.reset_index().rename(columns={'index':'interaction'})

In [None]:
theirs[theirs.interaction=="SEMA4D_PLXNB3"]

In [None]:
ours[ours.interaction=="SEMA4D_PLXNB3"]

In [None]:
# if both are negative, set to 1
local_pvals[~pos_msk] = 1

In [None]:
scipy.stats.pearsonr(local_pvals[903], my_sample.local_perm_p.iloc[531,:])

In [None]:
lr_res[lr_res.interaction=="SEMA4D_PLXNB3"]

In [None]:
my_sample.ind_sel[532]

In [None]:
scipy.stats.pearsonr(local_x[:, 903], my_sample.local_I[:,531])

In [None]:
scipy.stats.pearsonr(local_y[:, 903], my_sample.local_I_R[:,531])

In [None]:
hist(local_x[:, 903])

In [None]:
hist(my_sample.local_I[:,531])

compare pvals

In [None]:
scipy.stats.pearsonr(local_pvals[903,:], my_sample.local_perm_p.iloc[531,:])

zscore p-val

In [None]:
from scipy import stats

In [None]:
dist = scipy.sparse.csr_matrix(my_sample.rbf_d)
wij_sq = (np.array(dist.todense()) ** 2).sum(axis=1)

In [None]:
my_sample.local_permI.shape

In [None]:
ligand_norm = np.array([stats.norm.fit(ligand_mat[:, ligand]) for ligand in range(ligand_mat.shape[1])])
receptor_norm = np.array([stats.norm.fit(receptor_mat[:, receptor]) for receptor in range(receptor_mat.shape[1])])

In [None]:
# get ligand receptor mu and std
ligand_mu1, ligand_std = ligand_norm[:,0], ligand_norm[:,1]
receptor_mu1, receptor_std = receptor_norm[:,0], receptor_norm[:,1]

In [None]:
ligand_sigma_sq = [(std * spot_n / (spot_n - 1)) for std in ligand_std]
receptor_sigma_sq = [(std * spot_n / (spot_n - 1)) for std in receptor_std]

In [None]:
def compute_var_local(sigma1_sq, sigma2_sq, wij_sq, spot_n):
    var_I = 2 * (spot_n-1)**2/spot_n**2 * sigma1_sq * sigma2_sq * wij_sq + \
        2 * (spot_n-1)**2/spot_n**2 * sigma1_sq * sigma2_sq
    std_I=var_I**(1/2)
    
    return std_I

In [None]:
std_ls = [compute_var_local(sigma1_sq, sigma2_sq, wij_sq, spot_n) for (sigma1_sq, sigma2_sq) in zip(ligand_sigma_sq, receptor_sigma_sq)]

In [None]:
local_zscores = local_x.T + local_y.T / std_ls

In [None]:
local_zpvals = stats.norm.sf(local_zscores)

In [None]:
local_zpvals

In [None]:
local_zscores.shape

In [None]:
local_zscores

In [None]:
scipy.stats.pearsonr(local_zscores[903], my_sample.local_z[531,:])

In [None]:
# if both are negative, set to 1
local_zpvals[~pos_msk] = 1

In [None]:
scipy.stats.pearsonr(local_zpvals[903,:], my_sample.local_z_p.iloc[531,:])

In [None]:
hist(my_sample.local_z_p.iloc[531,:])

In [None]:
hist(local_zpvals[903,:])

In [None]:
scipy.stats.pearsonr(local_pvals[3], local_zpvals[3])

Local I does not make sense, scale not bound between -1 and +1

In [None]:
import liana as li
li.mt.get_spatial_proximity(adata=adata, parameter=200, bypass_diagonal=False, cutoff=0.2)

In [None]:
dist = adata.obsm['proximity']

In [None]:
norm_mat = np.array(adata.X - np.array(adata.X.mean(axis=0)))

In [None]:
ligand_mat.shape

In [None]:
spot_n = norm_mat.shape[0]

In [None]:
adata.X.shape

In [None]:
ligand_mat = np.array([norm_mat[:, ligand_pos[ligand]] for ligand in lr_res.ligand]).T
receptor_mat = np.array([norm_mat[:, receptor_pos[receptor]] for receptor in lr_res.receptor]).T

In [None]:
msq = (np.sum(ligand_mat ** 2, axis=0) / (spot_n - 1))
ligand_mat / msq

In [None]:
norm_mat.shape

In [None]:
msq

In [None]:
norm_mat

In [None]:
norm_mat / msq

In [None]:
local_x = ligand_mat * (dist @ receptor_mat)
x_m2 = np.sum(ligand_mat ** 2, axis=1) / spot_n
local_y = receptor_mat * (dist @ ligand_mat)
y_m2 = np.sum(receptor_mat ** 2, axis=1) / spot_n

OG DATA

In [None]:
import os
import pandas as pd
import numpy as np
import anndata as ann

import spatialdm as sdm
from spatialdm.datasets import dataset
import spatialdm.plottings as pl
print("SpatailDM version: %s" %sdm.__version__)



In [None]:
adata = dataset.melanoma()

In [None]:
raw = pd.DataFrame(adata.raw.X, index=adata.obs_names, columns=adata.var_names)
log = pd.DataFrame(adata.X, index=adata.obs_names, columns=adata.var_names)
raw.head()

In [None]:
spatialcoord = pd.DataFrame(adata.obsm['spatial'], index=adata.obs_names, columns=['x','y'])
spatialcoord.head()

In [None]:
my_sample = sdm.SpatialDM(log, raw, spatialcoord)     # load spatial data
my_sample.weight_matrix(l=1.2, cutoff=0.2, single_cell=False) # weight_matrix by rbf kernel

In [None]:
my_sample.extract_lr(species='human', min_cell=3)

In [None]:
my_sample.spatialdm_global(1000, method='both')     # global Moran selection

In [None]:
my_sample.sig_pairs(method='permutation', fdr=True, threshold=0.1)

In [None]:
# Local selection of significant spots
my_sample.spatialdm_local(n_perm=100, method='both', select_num=None, nproc=1)     # local spot selection complete in seconds

In [None]:
my_sample.sig_spots(threshold=0.1)  # significant local spots