In [114]:
import numba as nb

In [115]:
import squidpy as sq
import scanpy as sc
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, isspmatrix_csr

In [116]:
import liana as li

In [117]:
from matplotlib.pyplot import hist

In [118]:
from liana.method._global_lr_pipe import _global_lr_pipe
from liana.method.sp._spatialdm import _get_ordered_matrix, _standardize_matrix

In [None]:
# # scHOT data
# counts = pd.read_csv("data/counts_mat.csv")
# weights = pd.read_csv("data/weight_mat.csv")
# var = pd.DataFrame(counts[['Unnamed: 0']]).set_index('Unnamed: 0')
# var.index.name = None
# adata = sc.AnnData(X=csr_matrix(counts.drop(counts.columns[0], axis=1), dtype=np.float32).T, var=var)
# adata.obsm['proximity'] = csr_matrix(weights)

In [None]:
# scHOT data test
adata = sc.read_h5ad("test_spatial.h5ad")
resource = pd.DataFrame({"ligand":["Dnm1l", "Arrb1", "Igf2", "Dnm1l"], "receptor":["Gucy1b3", "Mtor", "Tuba1a", "Fam63b"]})
dist = adata.obsm['proximity']

In [None]:
n_perm = 100
seed = 0

In [119]:
# full visium slide
# load the pre-processed dataset
img = sq.datasets.visium_hne_image()
adata = sq.datasets.visium_hne_adata()

li.mt.get_spatial_proximity(adata=adata, parameter=200, bypass_diagonal=True, cutoff=0.1)
dist = adata.obsm['proximity']

my_p = li.pl.proximity_plot(adata, idx=100)
resource = li.resource.select_resource("mouseconsensus")

In [120]:
temp, lr_res, ligand_pos, receptor_pos = _global_lr_pipe(adata=adata,
                                                         resource=resource,
                                                         expr_prop=0.05,
                                                         use_raw=False,
                                                         verbose=True,
                                                         layer=None,
                                                         _key_cols=['ligand_complex', 'receptor_complex'],
                                                         _complex_cols=['ligand_means', 'receptor_means'],
                                                         _obms_keys=['proximity'],
                                                         resource_name=None
                                                         )

Using `.X`!


In [121]:
# lr_res = lr_res.head(50)

In [122]:
x_mat = _get_ordered_matrix(temp.X, ligand_pos, lr_res.ligand).A.astype(np.float32).T
y_mat = _get_ordered_matrix(temp.X, receptor_pos, lr_res.receptor).A.astype(np.float32).T

In [123]:
lr_res.head()

Unnamed: 0,interaction,ligand,receptor,ligand_complex,receptor_complex,ligand_means,ligand_props,receptor_means,receptor_props,prop_min
0,Dll1&Notch1,Dll1,Notch1,Dll1,Notch1,0.037283,0.057292,0.216689,0.276786,0.057292
1,Adam10&Notch1,Adam10,Notch1,Adam10,Notch1,0.623617,0.651786,0.216689,0.276786,0.276786
2,Jag1&Notch1,Jag1,Notch1,Jag1,Notch1,0.051299,0.075893,0.216689,0.276786,0.075893
3,Adam17&Notch1,Adam17,Notch1,Adam17,Notch1,0.121026,0.167783,0.216689,0.276786,0.167783
4,Dlk1&Notch1,Dlk1,Notch1,Dlk1,Notch1,0.236641,0.213542,0.216689,0.276786,0.213542


In [167]:
@nb.njit(nb.f4(nb.f4[:], nb.f4[:], nb.f4[:], nb.f4, nb.boolean), cache=True)
def wcor(x, y, w, wsum, rank):
    
    if rank:
        x = np.argsort(x).argsort().astype(nb.f4)
        y = np.argsort(y).argsort().astype(nb.f4)
    
    wx = w * x
    wy = w * y
    
    numerator = wsum * np.sum(wx * y) - np.sum(wx) * np.sum(wy)
    
    denominator_x = wsum * np.sum(w * (x**2)) - np.sum(wx)**2
    denominator_y = wsum * np.sum(w * (y**2)) - np.sum(wy)**2
    denominator = (denominator_x * denominator_y)
    
    if (denominator == 0) or (numerator == 0):
        return 0
    
    return numerator / (denominator**0.5)

In [168]:
@nb.njit(nb.f4(nb.f4[:], nb.f4[:], nb.f4[:]), cache=True)
def _wjaccard(x, y , w):
    x = (x > 0).astype(np.int8)
    y = (y > 0).astype(np.int8)
    
    # intersect and union
    numerator = np.sum(np.minimum(x, y) * w)
    denominator = np.sum(np.maximum(x, y) * w)
    
    if denominator == 0:
        return 0.0
    
    return numerator / denominator

In [169]:
@nb.njit(nb.f4(nb.f4[:], nb.f4[:], nb.f4[:]), cache=True)
def _wcossim(x, y , w):
    dot = np.dot(x * w, y)
    x_dot = np.dot(x * w, x)
    y_dot = np.dot(y * w, y)
    denominator = (x_dot * y_dot)
    
    if denominator == 0:
        return 0.0
    
    return dot / (denominator**0.5)

In [170]:
@nb.njit(nb.f4(nb.f4[:], nb.f4[:], nb.f4[:], nb.f4, nb.int8), cache=True)
def _wcoex(x, y, w, wsum, method):
        if method == 0: # pearson
            c = wcor(x, y, w, wsum, False)
        elif method == 1: # spearman
            c = wcor(x, y, w, wsum, True)
        elif method == 2: # jaccard
            c = _wjaccard(x, y, w)
        elif method == 3: # cosine
            c = _wcossim(x, y, w)
        else: 
            raise ValueError("method not supported")
        return c

In [171]:
# 0 = pearson, 1 = spearman
@nb.njit(nb.f4[:,:](nb.f4[:,:], nb.f4[:,:], nb.f4[:,:], nb.f4, nb.int8, nb.f4), cache=True, parallel=True)
def masked_coexpression(x_mat, y_mat, weight, wsum, method, weight_thr):
    spot_n = x_mat.shape[0]
    xy_n = x_mat.shape[1]
    
    local_correlations = np.zeros((spot_n, xy_n), dtype=nb.f4) # nb.f4
    
    for i in nb.prange(spot_n):
        w = weight[i, :]
        msk = w > weight_thr
        
        for j in range(xy_n):
            x = x_mat[:, j][msk]
            y = y_mat[:, j][msk]
            
            local_correlations[i, j] = _wcoex(x, y, w[msk], wsum, method)
    
    return local_correlations

In [129]:
dist = adata.obsm['proximity']
weight = dist.A.astype(np.float32)
wsum = np.sum(weight[0,:])

In [130]:
%%time
masked_pc = masked_coexpression(x_mat, y_mat, weight, wsum, method=0, weight_thr=0)

CPU times: user 55.1 s, sys: 17.6 ms, total: 55.1 s
Wall time: 5.56 s


In [145]:
masked_pc.T

array([[ 0.09826311,  0.        ,  0.08721767, ...,  0.29930848,
         0.04978303,  0.        ],
       [-0.1635023 ,  0.04346964, -0.33987418, ...,  0.14606072,
        -0.5928406 , -0.32859018],
       [-0.16473134, -0.05994885, -0.3344265 , ..., -0.33696443,
         0.08912473,  0.44609275],
       ...,
       [-0.12529875, -0.20152366, -0.41333967, ...,  0.12269165,
         0.22780639,  0.11751314],
       [ 0.06025517, -0.14673103,  0.05746137, ..., -0.19184436,
         0.6369634 , -0.0936611 ],
       [ 0.        ,  0.2219938 ,  0.01828931, ...,  0.12309346,
         0.        ,  0.        ]], dtype=float32)

In [154]:
np.where(np.isnan(masked_pc))

(array([   2,    2,    2, ..., 2687, 2687, 2687]),
 array([   5,   11,   17, ..., 1402, 1404, 1405]))

In [175]:
masked_pc.shape

(2688, 1409)

In [188]:
masked_pc[2,0:10]

nan

In [189]:
local_pc[5,0:10]

0.12891324

In [177]:
x = x_mat[:, 5]
w = weight[5, :]
y = y_mat[:, 1]

In [178]:
wcor(x, y, weight[5, :], wsum, False)

nan

In [179]:
wx = w * x
wy = w * y

numerator = wsum * np.sum(wx * y) - np.sum(wx) * np.sum(wy)

denominator_x = wsum * np.sum(w * (x**2)) - np.sum(wx)**2
denominator_y = wsum * np.sum(w * (y**2)) - np.sum(wy)**2
denominator = (denominator_x * denominator_y)

In [185]:
numerator / (denominator**0.5)



nan

In [187]:
pearsonr(x, y)

PearsonRResult(statistic=-0.0798658503658069, pvalue=3.391974049134917e-05)

In [198]:
w = np.ones(y.shape[0]).astype(np.float32)
wsum = np.sum(w).astype(np.float32)

In [199]:
wcor(x, y, w, wsum, False)

-0.0798661932349205

In [200]:
# WSUM is the issue????

In [None]:
%%time
masked_sp = masked_coexpression(x_mat, y_mat, weight, wsum, method=1, weight_thr=0)

In [None]:
masked_sp

In [None]:
%%time
masked_jc = masked_coexpression(x_mat, y_mat, weight, wsum, method=2, weight_thr=0)

In [None]:
masked_jc

In [None]:
%%time
masked_cos = masked_coexpression(x_mat, y_mat, weight, wsum, method=3, weight_thr=0)

In [None]:
masked_cos

In [None]:
from scipy.spatial.distance import jaccard, cosine

In [None]:
# binary 0 or 1 x, y arrays
x = np.array

In [None]:
x = np.random.randint(2, size=10).astype(np.float32)
y = np.random.randint(2, size=10).astype(np.float32)
w = np.ones(10).astype(np.float32)

In [None]:
_wjaccard(x, y, w)

In [None]:
1 - jaccard(x, y, w)

In [None]:
_wcossim(x, y, w)

In [None]:
1 - cosine(x, y, w)

Fully-vectorized

In [132]:
import squidpy as sq
import scanpy as sc
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, isspmatrix_csr

In [133]:
import liana as li

In [134]:
from matplotlib.pyplot import hist
from scipy.stats import spearmanr, pearsonr

In [135]:
from liana.method._global_lr_pipe import _global_lr_pipe
from liana.method.sp._spatialdm import _get_ordered_matrix, _standardize_matrix

In [136]:
from scipy.stats import rankdata

In [137]:
# ligand-receptor mats
ligand_mat = _get_ordered_matrix(temp.X, ligand_pos, lr_res.ligand).T
receptor_mat = _get_ordered_matrix(temp.X, receptor_pos, lr_res.receptor).T

In [138]:
def _vectorized_correlations(x_mat, y_mat, dist, method="pearson"):
    if method not in ["pearson", "spearman"]:
        raise ValueError("method must be one of 'pearson', 'spearman'")
    
    # transpose
    x_mat, y_mat = x_mat.T, y_mat.T
    
    weight = dist.A.T
    weight_sums = np.sum(weight, axis = 0).flatten()
        
    if method=="spearman":
        x_mat = rankdata(x_mat, axis=1)
        y_mat = rankdata(y_mat, axis=1)
    
    # standard pearson
    n1 = (((x_mat * y_mat).dot(weight)) * weight_sums)
    n2 = (x_mat.dot(weight)) * (y_mat.dot(weight))
    numerator = n1 - n2
    
    denominator_x = (weight_sums * (x_mat ** 2).dot(weight)) - (x_mat.dot(weight))**2
    denominator_y = (weight_sums * (y_mat ** 2).dot(weight)) - (y_mat.dot(weight))**2
    denominator = np.sqrt(denominator_x * denominator_y)
    denominator[denominator == 0] = np.finfo(np.float32).eps # add noise to avoid division by zero
    
    local_corrs = (numerator / denominator)
    
    return local_corrs

In [139]:
dist = adata.obsm['proximity']

In [140]:
local_pc = _vectorized_correlations(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist, method="pearson")

In [141]:
local_sp = _vectorized_correlations(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist, method="spearman")

In [None]:
local_sp

In [None]:
def _vectorized_wcosine(x_mat, y_mat, dist):
    
    # transpose
    x_mat, y_mat = x_mat.T, y_mat.T    
    weight = dist.A.T
    
    xy_dot = (x_mat * y_mat).dot(weight)
    x_dot = (x_mat ** 2).dot(weight.T)
    y_dot = (y_mat ** 2).dot(weight.T)
    denominator = (x_dot * y_dot) + np.finfo(np.float32).eps
    
    return xy_dot / (denominator**0.5)

In [None]:
local_cos = _vectorized_wcosine(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist)

In [None]:
local_cos

In [None]:
local_cos.shape

In [None]:
_wjaccard(x_mat[:, 0], y_mat[:, 0], weight[:, 0])

In [None]:
def _vectorized_jaccard(x_mat, y_mat, dist):
    # binarize
    x_mat, y_mat = x_mat > 0, y_mat > 0
    # transpose
    x_mat, y_mat = x_mat.T, y_mat.T    
    weight = dist.A.T
    
    # intersect and union
    numerator = np.dot(np.minimum(x_mat, y_mat), weight)
    denominator = np.dot(np.maximum(x_mat, y_mat), weight) + np.finfo(np.float32).eps
    
    return numerator / denominator

In [None]:
local_jc = _vectorized_jaccard(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist)

In [None]:
local_jc

In [None]:
from liana.method.sp._spatial_utils import _local_permutation_pvals

In [None]:
local_pvals = _local_permutation_pvals(x_mat = ligand_mat.A, 
                                       y_mat = receptor_mat.A, 
                                       local_truth=local_pc,
                                       local_fun=_vectorized_correlations,
                                       dist=dist, 
                                       n_perm=n_perm, 
                                       positive_only=False,
                                       seed=seed)

In [None]:
local_pvals

In [None]:
local_sp = _vectorized_correlations(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist, method="spearman")

In [None]:
local_sp_pvals = _local_permutation_pvals(x_mat = ligand_mat.A, 
                                          y_mat = receptor_mat.A, 
                                          local_truth=local_sp,
                                          local_fun=_vectorized_correlations,
                                          dist=dist, 
                                          n_perm=n_perm, 
                                          positive_only=False,
                                          seed=seed,
                                          method="spearman"
                                          )

In [None]:
local_pvals.shape

In [None]:
local_sp_pvals.shape

In [None]:
spearmanr(local_sp_pvals[1,:], local_pvals[1,:])

Global summary of the local scores:

In [None]:
lr_res.loc[:,['pearson_mean','pearson_sd']] = np.vstack([np.mean(local_pc, axis=1), np.std(local_pc, axis=1)]).T

In [None]:
lr_res.sort_values(by='pearson_mean', ascending=False)

local p-values

In [None]:
from numpy import random
from tqdm import tqdm

In [None]:
rng = random.default_rng(0)
n_perm = 100
positive_only = True # remove this option?

In [None]:
dist = adata.obsm['proximity']

In [None]:
local_pc.shape

In [None]:
def _get_local_permutation_pvals(x_mat, y_mat, local_truth, local_fun, dist, n_perm, positive_only=True, **kwargs):
    xy_n = local_truth.shape[0]
    spot_n = local_truth.shape[1]
    
    print(spot_n)
    
    # permutation cubes to be populated
    local_pvals = np.zeros((xy_n, spot_n))
    
    # shuffle the matrix
    for i in tqdm(range(n_perm)):
        _idx = rng.permutation(spot_n)
        perm_score = local_fun(x_mat = x_mat[_idx, :], y_mat=y_mat, dist=dist, **kwargs) ## TODO switch to shuffle rows, not columns
        if positive_only:
            local_pvals += np.array(perm_score >= local_truth, dtype=int)
        else:
            local_pvals += (np.array(np.abs(perm_score) >= np.abs(local_truth), dtype=int))

    local_pvals = local_pvals / n_perm
    
    return local_pvals
    

In [None]:
from liana.method.sp._spatial_utils import _local_permutation_pvals

In [None]:
local_pvals = _local_permutation_pvals(x_mat = ligand_mat.A, 
                                       y_mat = receptor_mat.A, 
                                       local_truth=local_pc,
                                       local_fun=calculate_local_correlations,
                                       dist=dist, 
                                       n_perm=n_perm, 
                                       positive_only=False,
                                       seed=0)

In [None]:
local_pvals

In [None]:
local_pvals

In [None]:
from  scipy.sparse import csr_matrix

In [None]:
local_pvals.shape

In [None]:
local_pvals

In [None]:
local_pc.shape

In [None]:
local_pvals.shape

In [None]:
local_masked_pvals = _get_local_permutation_pvals(x_mat = ligand_mat.A,
                                                  y_mat = receptor_mat.A,
                                                  local_truth = masked_sp,
                                                  local_fun=calculate_masked_correlations,
                                                  dist=dist,
                                                  n_perm=n_perm,
                                                  positive_only=False)

In [None]:
local_masked_pvals

In [None]:
local_masked_pvals.shape

In [None]:
from scipy.stats import spearmanr,  pearsonr

In [None]:
pearsonr(local_masked_pvals[0,:], local_pvals[0,:])

Plot to check the distribution of the local scores:

In [None]:
from liana.method.sp._spatialdm import spatialdm

In [None]:
spatialdm(adata=adata, resource_name="MouseConsensus", pvalue_method='analytical', verbose=True, use_raw=False)

In [None]:
local_pvals = li.ut.obsm_to_adata(adata, obsm_key='local_pvals')

In [None]:
local_r = li.ut.obsm_to_adata(adata, obsm_key='local_r')

In [None]:
sc.pl.spatial(local_pvals, color=['Sema4d&Plxnb3'])

In [None]:
sc.pl.spatial(local_r, color=['Sema4d&Plxnb3'])

In [None]:
adata.obsm['local_r']

In [None]:
def embed_score_to_adata(adata, score, score_name):
    score =  pd.DataFrame(score,
                          columns=adata.obsm['local_r'].columns,
                          index=adata.obsm['local_r'].index)
    adata.obsm[score_name] = score

In [None]:
# Masked Pearson correlation
embed_score_to_adata(adata, masked_pc, 'masked_pc')

# Vectorized Pearson correlation
embed_score_to_adata(adata, local_pc.T, 'local_pc')

In [None]:
# Masked Spearman correlation
embed_score_to_adata(adata, masked_sp, 'masked_sp')
# Vectorized Spearman correlation
embed_score_to_adata(adata, local_sp.T, 'local_sp')

In [None]:
# Masked Jaccard index
embed_score_to_adata(adata, masked_jc, 'masked_jc')
# Vectorized Jaccard index
embed_score_to_adata(adata, local_jc.T, 'local_jc')

In [None]:
# Masked Cosine similarity
embed_score_to_adata(adata, masked_cos, 'masked_cos')
# Vectorized Cosine similarity
embed_score_to_adata(adata, local_cos.T, 'local_cos')

Plot to check the distribution of the local scores:

In [None]:
sc.pl.spatial(adata, color=['Sema4d', 'Plxnb3'], use_raw=False)

In [None]:
local_r = li.ut.obsm_to_adata(adata, obsm_key='local_r')
sc.pl.spatial(local_r, color=['Sema4d&Plxnb3'])

In [None]:
sc.pl.spatial(local_pvals, color=['Sema4d&Plxnb3'])

Pearson

In [None]:
masked_pc = li.ut.obsm_to_adata(adata, obsm_key='masked_pc')
sc.pl.spatial(masked_pc, color=['Sema4d&Plxnb3'])

In [None]:
local_pc = li.ut.obsm_to_adata(adata, obsm_key='local_pc')
sc.pl.spatial(local_pc, color=['Sema4d&Plxnb3'])

In [None]:
local_sp = li.ut.obsm_to_adata(adata, obsm_key='local_sp')
sc.pl.spatial(local_sp, color=['Sema4d&Plxnb3'])

In [None]:
masked_sp = li.ut.obsm_to_adata(adata, obsm_key='masked_sp')
sc.pl.spatial(masked_sp, color=['Sema4d&Plxnb3'])

In [None]:
masked_jc = li.ut.obsm_to_adata(adata, obsm_key='masked_jc')
sc.pl.spatial(masked_jc, color=['Sema4d&Plxnb3'])

In [None]:
local_jc = li.ut.obsm_to_adata(adata, obsm_key='local_jc')
sc.pl.spatial(local_jc, color=['Sema4d&Plxnb3'])

Cosine

In [None]:
masked_cos = li.ut.obsm_to_adata(adata, obsm_key='masked_cos')
sc.pl.spatial(masked_cos, color=['Sema4d&Plxnb3'])

In [None]:
local_cos = li.ut.obsm_to_adata(adata, obsm_key='local_cos')
sc.pl.spatial(local_cos, color=['Sema4d&Plxnb3'])

In [None]:
embed_score_to_adata(adata, local_sp_pvals.T, 'local_sp_pvals')
local_sp_pvals = li.ut.obsm_to_adata(adata, obsm_key='local_sp_pvals')

In [None]:
sc.pl.spatial(local_sp_pvals, color=['Sema4d&Plxnb3'], cmap='viridis_r')