In [60]:
import numba as nb

In [61]:
import squidpy as sq
import scanpy as sc
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, isspmatrix_csr

In [62]:
import liana as li

In [63]:
from matplotlib.pyplot import hist

In [64]:
from liana.method._global_lr_pipe import _global_lr_pipe
from liana.method.sp._spatialdm import _get_ordered_matrix, _standardize_matrix

In [6]:
# # scHOT data
# counts = pd.read_csv("data/counts_mat.csv")
# weights = pd.read_csv("data/weight_mat.csv")
# var = pd.DataFrame(counts[['Unnamed: 0']]).set_index('Unnamed: 0')
# var.index.name = None
# adata = sc.AnnData(X=csr_matrix(counts.drop(counts.columns[0], axis=1), dtype=np.float32).T, var=var)
# adata.obsm['proximity'] = csr_matrix(weights)

In [7]:
# scHOT data test
adata = sc.read_h5ad("test_spatial.h5ad")
resource = pd.DataFrame({"ligand":["Dnm1l", "Arrb1", "Igf2", "Dnm1l"], "receptor":["Gucy1b3", "Mtor", "Tuba1a", "Fam63b"]})
dist = adata.obsm['proximity']

In [8]:
n_perm = 100
seed = 0

In [9]:
# full visium slide
# load the pre-processed dataset
img = sq.datasets.visium_hne_image()
adata = sq.datasets.visium_hne_adata()

li.mt.get_spatial_proximity(adata=adata, parameter=200, bypass_diagonal=False, cutoff=0.1)
dist = adata.obsm['proximity']

my_p = li.pl.proximity_plot(adata, idx=100)
resource = li.resource.select_resource("mouseconsensus")

In [10]:
temp, lr_res, ligand_pos, receptor_pos = _global_lr_pipe(adata=adata,
                                                         resource=resource,
                                                         expr_prop=0.05,
                                                         use_raw=False,
                                                         verbose=True,
                                                         layer=None,
                                                         _key_cols=['ligand_complex', 'receptor_complex'],
                                                         _complex_cols=['ligand_means', 'receptor_means'],
                                                         _obms_keys=['proximity'],
                                                         resource_name=None
                                                         )

Using `.X`!


In [11]:
# lr_res = lr_res.head(50)

In [189]:
x_mat = _get_ordered_matrix(temp.X, ligand_pos, lr_res.ligand).A.astype(np.float32).T
y_mat = _get_ordered_matrix(temp.X, receptor_pos, lr_res.receptor).A.astype(np.float32).T

In [190]:
lr_res.head()

Unnamed: 0,interaction,ligand,receptor,ligand_complex,receptor_complex,ligand_means,ligand_props,receptor_means,receptor_props,prop_min
0,Dll1&Notch1,Dll1,Notch1,Dll1,Notch1,0.037283,0.057292,0.216689,0.276786,0.057292
1,Adam10&Notch1,Adam10,Notch1,Adam10,Notch1,0.623617,0.651786,0.216689,0.276786,0.276786
2,Jag1&Notch1,Jag1,Notch1,Jag1,Notch1,0.051299,0.075893,0.216689,0.276786,0.075893
3,Adam17&Notch1,Adam17,Notch1,Adam17,Notch1,0.121026,0.167783,0.216689,0.276786,0.167783
4,Dlk1&Notch1,Dlk1,Notch1,Dlk1,Notch1,0.236641,0.213542,0.216689,0.276786,0.213542


In [14]:
@nb.njit(nb.f4(nb.f4[:], nb.f4[:], nb.f4[:], nb.f4, nb.boolean), cache=True)
def wcor(x, y, w, wsum, rank):
    
    if rank:
        x = np.argsort(x).argsort().astype(nb.f4)
        y = np.argsort(y).argsort().astype(nb.f4)
    
    wx = w * x
    wy = w * y
    
    numerator = wsum * np.sum(wx * y) - np.sum(wx) * np.sum(wy)
    
    denominator_x = wsum * np.sum(w * (x**2)) - np.sum(wx)**2
    denominator_y = wsum * np.sum(w * (y**2)) - np.sum(wy)**2
    denominator = np.sqrt(denominator_x * denominator_y + 1.0e-20) # avoid division by zero
    
    return numerator / denominator

In [15]:
@nb.njit(nb.f4(nb.f4[:], nb.f4[:], nb.f4[:]), cache=True)
def _wjaccard(x, y , w):
    x = (x > 0).astype(np.int8)
    y = (y > 0).astype(np.int8)
    
    # intersect and union
    numerator = np.sum(np.minimum(x, y) * w)
    denominator = np.sum(np.maximum(x, y) * w)
    
    if denominator == 0:
        return 0.0
    
    return numerator / denominator

In [16]:
@nb.njit(nb.f4(nb.f4[:], nb.f4[:], nb.f4[:]), cache=True)
def _wcossim(x, y , w):
    dot = np.dot(x * w, y)
    x_dot = np.dot(x * w, x)
    y_dot = np.dot(y * w, y)
    denominator = (x_dot * y_dot)
    
    if denominator == 0:
        return 0.0
    
    return dot / (denominator**0.5)

In [17]:
@nb.njit(nb.f4(nb.f4[:], nb.f4[:], nb.f4[:], nb.f4, nb.int8), cache=True)
def _wcoex(x, y, w, wsum, method):
        if method == 0: # pearson
            c = wcor(x, y, w, wsum, False)
        elif method == 1: # spearman
            c = wcor(x, y, w, wsum, True)
        elif method == 2: # jaccard
            c = _wjaccard(x, y, w)
        elif method == 3: # cosine
            c = _wcossim(x, y, w)
        else: 
            raise ValueError("method not supported")
        return c

In [18]:
# 0 = pearson, 1 = spearman
@nb.njit(nb.f4[:,:](nb.f4[:,:], nb.f4[:,:], nb.f4[:,:], nb.f4, nb.int8, nb.f4), cache=True, parallel=True)
def masked_coexpression(x_mat, y_mat, weight, wsum, method, weight_thr):
    spot_n = x_mat.shape[0]
    xy_n = x_mat.shape[1]
    
    local_correlations = np.zeros((spot_n, xy_n), dtype=nb.f4) # nb.f4
    
    for i in nb.prange(spot_n):
        w = weight[i, :]
        msk = w > weight_thr
        
        for j in range(xy_n):
            x = x_mat[:, j][msk]
            y = y_mat[:, j][msk]
            
            local_correlations[i, j] = _wcoex(x, y, w[msk], wsum, method)
    
    return local_correlations

In [19]:
dist = adata.obsm['proximity']
weight = dist.A.astype(np.float32)
wsum = np.sum(weight[0,:])

In [20]:
%%time
local_pc = masked_coexpression(x_mat, y_mat, weight, wsum, method=0, weight_thr=0)

CPU times: user 57.1 s, sys: 664 ms, total: 57.8 s
Wall time: 5.78 s


In [21]:
local_pc

array([[ 0.117755  , -0.19442835, -0.14684229, ..., -0.11232399,
         0.07182601,  0.        ],
       [ 0.        ,  0.080853  , -0.05427799, ..., -0.23333478,
        -0.13238111,  0.23010302],
       [ 0.02571407, -0.31918997, -0.3443697 , ..., -0.46065855,
         0.03413666,  0.04124185],
       ...,
       [ 0.3057408 ,  0.21924528, -0.2982757 , ...,  0.15543087,
        -0.20192774,  0.13837363],
       [ 0.06430519, -0.54283893,  0.0998208 , ..., -0.02045712,
         0.6465301 ,  0.        ],
       [ 0.        , -0.29874232,  0.46295607, ...,  0.14961113,
        -0.05767659,  0.        ]], dtype=float32)

In [22]:
%%time
local_sp = masked_coexpression(x_mat, y_mat, weight, wsum, method=1, weight_thr=0)

CPU times: user 2min 44s, sys: 229 ms, total: 2min 44s
Wall time: 15.1 s


In [23]:
local_sp

array([[ 0.24194688, -0.07148522,  0.1429437 , ...,  0.36770657,
         0.26095694,  0.15096213],
       [ 0.5237845 ,  0.06520993,  0.51952165, ...,  0.63994974,
         0.5838346 ,  0.24281792],
       [-0.44582912, -1.5251493 , -1.0786409 , ..., -2.3390338 ,
        -0.6437703 , -1.3178389 ],
       ...,
       [-0.448252  , -0.61211634, -1.0975016 , ..., -0.67011935,
        -2.4174755 ,  0.2697524 ],
       [-0.05192441, -1.1924374 ,  0.09808166, ..., -1.310565  ,
         0.08902387, -1.0331533 ],
       [-0.03718835, -0.94827324,  0.23098512, ..., -0.3824148 ,
        -0.2658236 ,  0.00775319]], dtype=float32)

In [24]:
%%time
local_jc = masked_coexpression(x_mat, y_mat, weight, wsum, method=2, weight_thr=0)

CPU times: user 1min 10s, sys: 127 ms, total: 1min 11s
Wall time: 6.53 s


In [25]:
local_jc

array([[0.16999356, 0.32563385, 0.        , ..., 0.        , 0.03914943,
        0.        ],
       [0.        , 0.14585169, 0.        , ..., 0.        , 0.        ,
        0.12763652],
       [0.14640486, 0.45679104, 0.        , ..., 0.05793051, 0.4916278 ,
        0.10502105],
       ...,
       [0.09550774, 0.40108907, 0.        , ..., 0.21680813, 0.04271584,
        0.21501966],
       [0.06584294, 0.30835396, 0.05195678, ..., 0.12773621, 0.4549307 ,
        0.        ],
       [0.        , 0.26405674, 0.35999528, ..., 0.18800312, 0.14165509,
        0.        ]], dtype=float32)

In [26]:
%%time
local_cos = masked_coexpression(x_mat, y_mat, weight, wsum, method=3, weight_thr=0)

CPU times: user 1min 10s, sys: 179 ms, total: 1min 10s
Wall time: 6.99 s


In [27]:
local_cos

array([[0.2757257 , 0.31930378, 0.        , ..., 0.        , 0.1663052 ,
        0.        ],
       [0.        , 0.32796076, 0.        , ..., 0.        , 0.        ,
        0.30168733],
       [0.2556982 , 0.42359215, 0.        , ..., 0.06526156, 0.58937234,
        0.21915889],
       ...,
       [0.36441803, 0.5661081 , 0.        , ..., 0.384288  , 0.09428672,
        0.27216113],
       [0.17220944, 0.42217103, 0.1826283 , ..., 0.33179152, 0.72309417,
        0.        ],
       [0.        , 0.42969874, 0.59261453, ..., 0.3744975 , 0.21072412,
        0.        ]], dtype=float32)

In [28]:
from scipy.spatial.distance import jaccard, cosine

In [29]:
# binary 0 or 1 x, y arrays
x = np.array

In [30]:
x = np.random.randint(2, size=10).astype(np.float32)
y = np.random.randint(2, size=10).astype(np.float32)
w = np.ones(10).astype(np.float32)

In [31]:
_wjaccard(x, y, w)

0.3333333432674408

In [32]:
1 - jaccard(x, y, w)

0.33333333333333337

In [33]:
_wcossim(x, y, w)

0.5

In [34]:
1 - cosine(x, y, w)

0.5

In [35]:
y

array([1., 1., 0., 1., 1., 1., 0., 1., 0., 0.], dtype=float32)

In [188]:
w = weight[0, :]
msk = w > 0
x = x_mat[:, 0][msk]
y = y_mat[:, 0][msk]

IndexError: boolean index did not match indexed array along dimension 0; dimension is 1409 but corresponding boolean dimension is 2688

In [172]:
x.shape

(2688,)

In [187]:
1 - jaccard((x > 0).astype(np.int8),(y > 0).astype(np.int8), w[msk])

ValueError: operands could not be broadcast together with shapes (28,) (2688,) 

In [77]:
_wjaccard(x, y, w[msk])

0.1699935644865036

In [78]:
1 - cosine(x, y, w[msk])

0.275725681422597

In [79]:
_wcossim(x, y, w[msk])

0.27572569251060486

Fully-vectorized

In [43]:
import squidpy as sq
import scanpy as sc
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, isspmatrix_csr

In [44]:
import liana as li

In [45]:
from matplotlib.pyplot import hist
from scipy.stats import spearmanr, pearsonr

In [46]:
from liana.method._global_lr_pipe import _global_lr_pipe
from liana.method.sp._spatialdm import _get_ordered_matrix, _standardize_matrix

In [47]:
from scipy.stats import rankdata

In [48]:
# ligand-receptor mats
ligand_mat = _get_ordered_matrix(temp.X, ligand_pos, lr_res.ligand).T
receptor_mat = _get_ordered_matrix(temp.X, receptor_pos, lr_res.receptor).T

In [55]:
def _vectorized_correlations(x_mat, y_mat, dist, method="pearson"):
    if method not in ["pearson", "spearman"]:
        raise ValueError("method must be one of 'pearson', 'spearman'")
    
    # transpose
    x_mat, y_mat = x_mat.T, y_mat.T
    
    weight = dist.A.T
    weight_sums = np.sum(weight, axis = 0).flatten()
        
    if method=="spearman":
        x_mat = rankdata(x_mat, axis=1)
        y_mat = rankdata(y_mat, axis=1)
    
    # standard pearson
    n1 = (((x_mat * y_mat).dot(weight)) * weight_sums)
    n2 = (x_mat.dot(weight)) * (y_mat.dot(weight))
    numerator = n1 - n2
    
    denominator_x = (weight_sums * (x_mat ** 2).dot(weight)) - (x_mat.dot(weight))**2
    denominator_y = (weight_sums * (y_mat ** 2).dot(weight)) - (y_mat.dot(weight))**2
    denominator = np.sqrt(denominator_x * denominator_y)
    denominator[denominator == 0] = np.finfo(np.float32).eps # add noise to avoid division by zero
    
    local_corrs = (numerator / denominator)
    
    return local_corrs

In [56]:
dist = adata.obsm['proximity']

In [57]:
local_pc = _vectorized_correlations(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist, method="pearson")

In [58]:
local_sp = _vectorized_correlations(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist, method="spearman")

In [59]:
local_pc

array([[ 0.11775495,  0.        ,  0.07320522, ...,  0.31679758,
         0.08614511,  0.        ],
       [-0.19442862, -0.00572051, -0.10251832, ...,  0.31816354,
        -0.19816568, -0.01154072],
       [-0.14684229, -0.06664198, -0.2709753 , ..., -0.23486906,
         0.11611345,  0.49061623],
       ...,
       [-0.11232401, -0.2953236 , -0.34604046, ...,  0.20728923,
         0.06490986,  0.20051171],
       [ 0.071826  , -0.1637877 ,  0.19777067, ..., -0.11512091,
         0.6614117 ,  0.0007559 ],
       [ 0.        ,  0.21222384,  0.0748205 , ...,  0.16439405,
         0.        ,  0.        ]], dtype=float32)

In [214]:
local_sp.shape

(1409, 2688)

In [182]:
def _vectorized_wcosine(x_mat, y_mat, dist):
    
    # transpose
    x_mat, y_mat = x_mat.T, y_mat.T    
    weight = dist.A.T
    
    xy_dot = (x_mat * y_mat).dot(weight)
    x_dot = (x_mat ** 2).dot(weight.T)
    y_dot = (y_mat ** 2).dot(weight.T)
    denominator = (x_dot * y_dot) + np.finfo(np.float32).eps
    
    return xy_dot / (denominator**0.5)

In [183]:
local_cos = _vectorized_wcosine(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist)

In [213]:
local_cos.shape

(1409, 2688)

In [203]:
_wjaccard(x_mat[:, 0], y_mat[:, 0], weight[:, 0])

0.1699935644865036

In [219]:
def _vectorized_jaccard(x_mat, y_mat, dist):
    # binarize
    x_mat, y_mat = x_mat > 0, y_mat > 0
    # transpose
    x_mat, y_mat = x_mat.T, y_mat.T    
    weight = dist.A.T
    
    # intersect and union
    numerator = np.dot(np.minimum(x_mat, y_mat), weight)
    denominator = np.dot(np.maximum(x_mat, y_mat), weight)
    
    return numerator / denominator

In [220]:
_vectorized_jaccard(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist)



array([[0.16999356, 0.        , 0.14640486, ..., 0.09550774, 0.06584294,
        0.        ],
       [0.32563385, 0.14585169, 0.45679104, ..., 0.40108907, 0.30835396,
        0.26405674],
       [0.        , 0.        , 0.        , ..., 0.        , 0.05195678,
        0.35999528],
       ...,
       [0.        , 0.        , 0.05793051, ..., 0.21680813, 0.12773621,
        0.18800312],
       [0.03914943, 0.        , 0.4916278 , ..., 0.04271584, 0.4549307 ,
        0.14165509],
       [0.        , 0.12763652, 0.10502105, ..., 0.21501966, 0.        ,
        0.        ]], dtype=float32)

In [65]:
from liana.method.sp._spatial_utils import _local_permutation_pvals

In [66]:
local_pvals = _local_permutation_pvals(x_mat = ligand_mat.A, 
                                       y_mat = receptor_mat.A, 
                                       local_truth=local_pc,
                                       local_fun=_vectorized_correlations,
                                       dist=dist, 
                                       n_perm=n_perm, 
                                       positive_only=False,
                                       seed=seed)

100%|██████████| 100/100 [01:39<00:00,  1.00it/s]


In [67]:
local_pvals

array([[0.53, 1.  , 0.72, ..., 0.06, 0.69, 1.  ],
       [0.42, 0.96, 0.67, ..., 0.11, 0.3 , 0.93],
       [0.51, 0.69, 0.13, ..., 0.11, 0.52, 0.  ],
       ...,
       [0.66, 0.19, 0.1 , ..., 0.31, 0.85, 0.5 ],
       [0.71, 0.59, 0.33, ..., 0.32, 0.  , 1.  ],
       [1.  , 0.33, 0.69, ..., 0.14, 1.  , 1.  ]])

In [68]:
local_sp = _vectorized_correlations(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist, method="spearman")

In [69]:
local_sp_pvals = _local_permutation_pvals(x_mat = ligand_mat.A, 
                                          y_mat = receptor_mat.A, 
                                          local_truth=local_sp,
                                          local_fun=_vectorized_correlations,
                                          dist=dist, 
                                          n_perm=n_perm, 
                                          positive_only=False,
                                          seed=seed,
                                          method="spearman"
                                          )

100%|██████████| 100/100 [04:26<00:00,  2.66s/it]


In [70]:
local_pvals.shape

(1409, 2688)

In [71]:
local_sp_pvals.shape

(1409, 2688)

In [72]:
spearmanr(local_sp_pvals[1,:], local_pvals[1,:])

SignificanceResult(statistic=0.9086609733687256, pvalue=0.0)

Global summary of the local scores:

In [None]:
lr_res.loc[:,['pearson_mean','pearson_sd']] = np.vstack([np.mean(local_pc, axis=1), np.std(local_pc, axis=1)]).T

In [None]:
lr_res.sort_values(by='pearson_mean', ascending=False)

local p-values

In [None]:
from numpy import random
from tqdm import tqdm

In [None]:
rng = random.default_rng(0)
n_perm = 100
positive_only = True # remove this option?

In [None]:
dist = adata.obsm['proximity']

In [None]:
local_pc.shape

In [None]:
def _get_local_permutation_pvals(x_mat, y_mat, local_truth, local_fun, dist, n_perm, positive_only=True, **kwargs):
    xy_n = local_truth.shape[0]
    spot_n = local_truth.shape[1]
    
    print(spot_n)
    
    # permutation cubes to be populated
    local_pvals = np.zeros((xy_n, spot_n))
    
    # shuffle the matrix
    for i in tqdm(range(n_perm)):
        _idx = rng.permutation(spot_n)
        perm_score = local_fun(x_mat = x_mat[_idx, :], y_mat=y_mat, dist=dist, **kwargs) ## TODO switch to shuffle rows, not columns
        if positive_only:
            local_pvals += np.array(perm_score >= local_truth, dtype=int)
        else:
            local_pvals += (np.array(np.abs(perm_score) >= np.abs(local_truth), dtype=int))

    local_pvals = local_pvals / n_perm
    
    return local_pvals
    

In [None]:
from liana.method.sp._spatial_utils import _local_permutation_pvals

In [None]:
local_pvals = _local_permutation_pvals(x_mat = ligand_mat.A, 
                                       y_mat = receptor_mat.A, 
                                       local_truth=local_pc,
                                       local_fun=calculate_local_correlations,
                                       dist=dist, 
                                       n_perm=n_perm, 
                                       positive_only=False,
                                       seed=0)

In [None]:
local_pvals

In [None]:
local_pvals

In [None]:
from  scipy.sparse import csr_matrix

In [None]:
local_pvals.shape

In [None]:
local_pvals

In [None]:
local_pc.shape

In [None]:
local_pvals.shape

In [None]:
local_masked_pvals = _get_local_permutation_pvals(x_mat = ligand_mat.A,
                                                  y_mat = receptor_mat.A,
                                                  local_truth = masked_sp,
                                                  local_fun=calculate_masked_correlations,
                                                  dist=dist,
                                                  n_perm=n_perm,
                                                  positive_only=False)

In [None]:
local_masked_pvals

In [None]:
local_masked_pvals.shape

In [None]:
from scipy.stats import spearmanr,  pearsonr

In [None]:
pearsonr(local_masked_pvals[0,:], local_pvals[0,:])