In [65]:
import squidpy as sq
import scanpy as sc
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, isspmatrix_csr

In [3]:
import liana as li

In [4]:
from matplotlib.pyplot import hist

In [5]:
from liana.method._global_lr_pipe import _global_lr_pipe
from liana.method.sp._spatialdm import _get_ordered_matrix, _standardize_matrix

In [6]:
import numba as nb

In [None]:
counts = pd.read_csv("data/counts_mat.csv")
weights = pd.read_csv("data/weight_mat.csv")

In [None]:
var = pd.DataFrame(counts[['Unnamed: 0']]).set_index('Unnamed: 0')
var.index.name = None

In [None]:
adata = sc.AnnData(X=csr_matrix(counts.drop(counts.columns[0], axis=1), dtype=np.float32).T, var=var)

In [None]:
adata.obsm['proximity'] = csr_matrix(weights)

In [11]:
# scHOT data test
adata = sc.read_h5ad("test_spatial.h5ad")
resource = pd.DataFrame({"ligand":["Dnm1l", "Arrb1", "Igf2", "Dnm1l"], "receptor":["Gucy1b3", "Mtor", "Tuba1a", "Fam63b"]})
dist = adata.obsm['proximity']

In [12]:
n_perm = 100
seed = 0

In [30]:
# full visium slide
# load the pre-processed dataset
img = sq.datasets.visium_hne_image()
adata = sq.datasets.visium_hne_adata()

li.mt.get_spatial_proximity(adata=adata, parameter=200, bypass_diagonal=False, cutoff=0.1)
dist = adata.obsm['proximity']

my_p = li.pl.proximity_plot(adata, idx=100)
resource = li.resource.select_resource("mouseconsensus")

In [66]:
temp, lr_res, ligand_pos, receptor_pos = _global_lr_pipe(adata=adata,
                                                         resource=resource,
                                                         expr_prop=0.05,
                                                         use_raw=False,
                                                         verbose=True,
                                                         layer=None,
                                                         _key_cols=['ligand_complex', 'receptor_complex'],
                                                         _complex_cols=['ligand_means', 'receptor_means'],
                                                         _obms_keys=['proximity'],
                                                         resource_name=None
                                                         )

Using `.X`!


In [33]:
lr_res = lr_res.head(50)

In [67]:
x_mat = _get_ordered_matrix(temp.X, ligand_pos, lr_res.ligand).A.astype(np.float32).T
y_mat = _get_ordered_matrix(temp.X, receptor_pos, lr_res.receptor).A.astype(np.float32).T

In [35]:
@nb.njit(nb.f4(nb.f4[:], nb.f4[:], nb.f4[:], nb.f4), cache=True)
def wcor(x, y, w, wsum):
    
    wx = w * x
    wy = w * y
    
    numerator = wsum * np.sum(wx * y) - np.sum(wx) * np.sum(wy)
    
    denominator_x = wsum * np.sum(w * (x**2)) - np.sum(wx)**2
    denominator_y = wsum * np.sum(w * (y**2)) - np.sum(wy)**2
    denominator = np.sqrt(denominator_x * denominator_y) + 1.0e-20  # avoid division by zero, do if wegihts are all zero return 0? np.finfo(np.float64).eps
    
    wcor = numerator / denominator
    
    return wcor

In [None]:
i = 0
dist = adata.obsm['proximity']
weight = dist.A.astype(np.float32)

In [None]:
wsum = np.sum(weight[0,:])

In [None]:
x, y = x_mat[:,i].astype(np.float32), y_mat[:,i].astype(np.float32)

In [None]:
w = weight[i,:].astype(np.float32)

In [None]:
wx = w * x
yw = w * y

In [None]:
numerator = wsum * np.sum(wx * y) - np.sum(wx) * np.sum(yw)

In [None]:
wsum * np.sum(wx * y)

In [None]:
np.sum(wx) * np.sum(yw)

In [None]:
numerator

In [None]:
denominator_x = wsum * np.sum(w * (x**2)) - np.sum(wx)**2
denominator_y = wsum * np.sum(w * (y**2)) - np.sum(yw)**2
denominator = np.sqrt(denominator_x * denominator_y) + 1.0e-20  # avoid division by zero, do if wegihts are all zero return 0? np.finfo(np.float64).eps


In [None]:
wcor = numerator / denominator

In [None]:
wcor

In [None]:
# all weights = 1 (i.e. pearsonr)
wcor(x=x, y=y, w=weight[i,:], wsum=wsum)

In [None]:
@nb.njit(nb.f4[:,:](nb.f4[:,:], nb.f4[:,:], nb.f4[:,:], nb.f4), cache=True, parallel=True)
def calculate_wcor(x_mat, y_mat, weight, wsum):
    spot_n = x_mat.shape[0]
    xy_n = x_mat.shape[1]
    
    local_correlations = np.zeros((spot_n, xy_n), dtype=nb.f4) # nb.f4
    
    for i in nb.prange(spot_n):
        w = weight[i, :]
        # if w sum is zero, return 0
        for j in range(xy_n):
            local_correlations[i, j] = wcor(x_mat[:, j], y_mat[:, j], w, wsum)
    
    return local_correlations

In [None]:
%%time
calculate_wcor(x_mat, y_mat, weight, wsum)

NameError: name 'np' is not defined

Semi-vectorized

In [36]:
x_mat.shape

(2688, 50)

In [68]:
# @nb.njit(nb.f4[::1](nb.f4[:,::1], nb.f4[:,::1], nb.f4[::1], nb.f4), cache=True)
# def wcov_vec(x_mat, y_mat, w, wsum):
    
#     n1 = wsum * (x_mat * y_mat).T.dot(w)
#     n2 = x_mat.T.dot(w) * y_mat.T.dot(w)
#     numerator = n1 - n2
    
#     denominator_x = wsum * w.dot(x_mat**2) - x_mat.T.dot(w)**2
#     denominator_y = wsum * w.dot(y_mat**2) - y_mat.T.dot(w)**2
#     denominator = np.sqrt(denominator_x * denominator_y + np.finfo(nb.f4).eps)
    
#     wcov = numerator / (denominator)
    
#     return wcov

In [80]:
@nb.njit(nb.float32[:,:](nb.float32[:,::1], nb.float32[:,::1], nb.float32[:,::1], nb.float32), cache=True)
def calc_local_correlations(x_mat, y_mat, weight, wsum):
    
    def wcov_vec(x_mat, y_mat, w, wsum):
        n1 = wsum * (x_mat * y_mat).T.dot(w)
        n2 = x_mat.T.dot(w) * y_mat.T.dot(w)
        numerator = n1 - n2
        
        denominator_x = wsum * w.dot(x_mat**2) - x_mat.T.dot(w)**2
        denominator_y = wsum * w.dot(y_mat**2) - y_mat.T.dot(w)**2
        denominator = np.sqrt(denominator_x * denominator_y + np.finfo(nb.float32).eps)
        
        wcov = numerator / (denominator)
        
        return wcov
    
    local_correlations = np.zeros((x_mat.shape[0], x_mat.shape[1]), dtype=nb.float32)
    
    for i in range(weight.shape[0]):
        w = weight[i, :]
        local_correlations[i,:] = wcov_vec(x_mat, y_mat, w, wsum)
        
    return local_correlations

In [70]:
x_mat = _get_ordered_matrix(temp.X, ligand_pos, lr_res.ligand).A.astype(np.float32).T
y_mat = _get_ordered_matrix(temp.X, receptor_pos, lr_res.receptor).A.astype(np.float32).T

In [71]:
i = 0

In [72]:
dist = adata.obsm['proximity']
weight = dist.A.astype(np.float32)
weight = np.asarray(weight, dtype=np.float32)
w = weight[i,:].astype(np.float32)
wsum = np.sum(w)

In [73]:
x_mat = np.ascontiguousarray(x_mat)
y_mat = np.ascontiguousarray(y_mat)
weight = np.ascontiguousarray(weight)

In [74]:
x_mat.shape

(2688, 1409)

In [81]:
wcov_vec(x_mat, y_mat, w, wsum)

ValueError: data type <class 'numpy.object_'> not inexact

In [None]:
%%time
calc_local_correlations(x_mat=x_mat, y_mat=y_mat, weight=weight, wsum=wsum)

In [None]:
def convert2(x, dtype=np.float64):
    try:
        # Try and convert x to a Numpy array. If this succeeds
        # then we have reached the end of the nesting-depth.
        y = np.asarray(x, dtype=dtype)
    except:
        # If the conversion to a Numpy array fails, then it can
        # be because not all elements of x can be converted to
        # the given dtype. There is currently no way to distinguish
        # if this is because x is a nested list, or just a list
        # of simple elements with incompatible types.

        # Recursively call this function on all elements of x.
        y = [convert2(x_, dtype=dtype) for x_ in x]

        # Convert Python list to Numba list.
        y = numba.typed.List(y)

    return y

In [None]:
print(calc_local_correations.inspect_types())

In [None]:
convert2(y_mat)

In [None]:
weight

In [None]:
wsum

In [None]:
# @nb.njit(nb.f4[:,:](nb.f4[:,:], nb.f4[:,:], nb.f4[:,:], nb.f4), cache=True, parallel=True)
def calculate_wcor(x_mat, y_mat, weight, wsum):
    spot_n = x_mat.shape[0]
    xy_n = x_mat.shape[1]
    
    local_correlations = np.zeros((spot_n, xy_n), dtype=np.float32) # nb.f4
    
    for i in nb.prange(spot_n):
        w = weight[i, :]
        # if w sum is zero, return 0
        local_correlations[i, :] = wcov_vec(x_mat, y_mat, w, wsum)
    
    return local_correlations

In [None]:
numerator = np.zeros((x_mat.shape[0], x_mat.shape[1]), dtype=np.float32)
for i in range(weight.shape[0]):
    w = weight[i, :]
    
    numerator[:,i] = wcov_vec(x_mat, y_mat, w, wsum)
    

In [None]:
def wcov_vec(x, y, w, wsum):
    
    wx = w * x
    yw = w * y
    
    numerator = wsum * np.sum(wx * y) - np.sum(wx) * np.sum(yw)
    
    denominator_x = wsum * np.sum(w * (x**2)) - np.sum(wx)**2
    denominator_y = wsum * np.sum(w * (y**2)) - np.sum(yw)**2
    denominator = np.sqrt(denominator_x * denominator_y) + 1.0e-20  # avoid division by zero, do if wegihts are all zero return 0? np.finfo(np.float64).eps
    
    wcov = numerator / denominator
    
    return wcov

Correlations

Fully-vectorized

In [None]:
import squidpy as sq
import scanpy as sc
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, isspmatrix_csr

In [None]:
import liana as li

In [None]:
from matplotlib.pyplot import hist
from scipy.stats import spearmanr, pearsonr

In [None]:
from liana.method._global_lr_pipe import _global_lr_pipe
from liana.method.sp._spatialdm import _get_ordered_matrix, _standardize_matrix

In [None]:
from scipy.stats import rankdata

In [None]:
# ligand-receptor mats
ligand_mat = _get_ordered_matrix(temp.X, ligand_pos, lr_res.ligand).T
receptor_mat = _get_ordered_matrix(temp.X, receptor_pos, lr_res.receptor).T

In [None]:
def calculate_local_correlations(x_mat, y_mat, dist, method="pearson"):
    if method not in ["pearson", "spearman"]:
        raise ValueError("method must be one of 'pearson', 'spearman'")
    
    # transpose
    x_mat, y_mat = x_mat.T, y_mat.T
    
    weight = dist.A.T
    weight_sums = np.sum(weight, axis = 0).flatten()
        
    if method=="spearman":
        x_mat = rankdata(x_mat, axis=1)
        y_mat = rankdata(y_mat, axis=1)
    
    # standard pearson
    n1 = (((x_mat * y_mat).dot(weight)) * weight_sums)
    n2 = (x_mat.dot(weight)) * (y_mat.dot(weight))
    numerator = n1 - n2
    
    denominator_x = (weight_sums * (x_mat ** 2).dot(weight)) - (x_mat.dot(weight))**2
    denominator_y = (weight_sums * (y_mat ** 2).dot(weight)) - (y_mat.dot(weight))**2
    denominator = np.sqrt(denominator_x * denominator_y)
    denominator[denominator == 0] = np.finfo(float).eps # add noise to avoid division by zero
    
    local_corrs = (numerator / denominator)
    
    return local_corrs

In [None]:
dist = adata.obsm['proximity']

In [None]:
local_pc = calculate_local_correlations(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist, method="pearson")

In [None]:
local_pc.shape

In [None]:
local_pc

In [None]:
from liana.method.sp._spatial_utils import _local_permutation_pvals

In [None]:
local_pvals = _local_permutation_pvals(x_mat = ligand_mat.A, 
                                       y_mat = receptor_mat.A, 
                                       local_truth=local_pc,
                                       local_fun=calculate_local_correlations,
                                       dist=dist, 
                                       n_perm=n_perm, 
                                       positive_only=False,
                                       seed=seed)

In [None]:
local_pvals

In [None]:
local_sp = calculate_local_correlations(x_mat = ligand_mat.A, y_mat=receptor_mat.A, dist=dist, method="spearman")

In [None]:
local_sp_pvals = _local_permutation_pvals(x_mat = ligand_mat.A, 
                                          y_mat = receptor_mat.A, 
                                          local_truth=local_sp,
                                          local_fun=calculate_local_correlations,
                                          dist=dist, 
                                          n_perm=n_perm, 
                                          positive_only=False,
                                          seed=seed,
                                          method="spearman"
                                          )

In [None]:
local_pvals.shape

In [None]:
local_sp_pvals.shape

In [None]:
spearmanr(local_sp_pvals[1,:], local_pvals[1,:])

Global summary of the local scores:

In [None]:
lr_res.loc[:,['pearson_mean','pearson_sd']] = np.vstack([np.mean(local_pc, axis=1), np.std(local_pc, axis=1)]).T

In [None]:
lr_res.sort_values(by='pearson_mean', ascending=False)

masked

In [None]:
# ligand-receptor mats
ligand_mat = _get_ordered_matrix(temp.X, ligand_pos, lr_res.ligand)
receptor_mat = _get_ordered_matrix(temp.X, receptor_pos, lr_res.receptor)

In [None]:
import scipy.stats as stats

In [None]:
def masked_wcor(x, y, weight, method='spearman_nzw'):
    spot_n = x.shape[0]
    
    # reshape x and y to be the same shape as weight
    x = np.reshape(np.repeat(x, spot_n), newshape=(spot_n, spot_n)).T
    y = np.reshape(np.repeat(y, spot_n), newshape=(spot_n, spot_n)).T
    
    # mask x and y with the same mask as weight
    x_masked = np.ma.array(x, mask = weight.mask, fill_value=np.nan)
    y_masked = np.ma.array(y, mask = weight.mask, fill_value=np.nan)
    
    if method == 'spearman_nzw':
        x_masked = stats.mstats.rankdata(x_masked, axis=1)
        y_masked = stats.mstats.rankdata(y_masked, axis=1)
        
    
    # calculate weighted pearson correlation
    wsum = np.ma.sum(weight, axis=1)
    xws = np.ma.sum(weight * x_masked, axis=1)
    yws = np.ma.sum(weight * y_masked, axis=1)
    
    n1 = wsum * np.ma.sum(weight * x_masked * y_masked, axis=1)
    n2 = xws * yws
    numerator = n1 - n2
    
    denominator_x = wsum * np.ma.sum(weight * (x_masked**2), axis=1) - xws**2
    denominator_y = wsum * np.ma.sum(weight * (y_masked**2), axis=1) - yws**2
    wcor = numerator / np.ma.sqrt(denominator_x * denominator_y)
    
    return wcor.data

In [None]:
def calculate_masked_correlations(x_mat, y_mat, dist, method='spearman_nzw'):
    weight = dist.A
    msk = np.logical_not(weight>0).astype(np.int16)
    weight = np.ma.masked_array(weight, mask=msk)
    # calculate for each x and y combination
    local_correlations = []
    
    for i in range(x_mat.shape[0]):
        local_correlations.append(masked_wcor(x_mat[i, :], y_mat[i, :], weight=weight))
    local_correlations = np.array(local_correlations)
    
    return local_correlations


In [None]:
ligand_mat.shape

In [None]:
%%time

dist = adata.obsm['proximity']

masked_sp = calculate_masked_correlations(ligand_mat[0:10,:].A, receptor_mat[0:10,:].A, dist=dist, method='spearman_nzw')

In [None]:
masked_sp[0:10,0:10]

In [None]:
hist(masked_sp[0,:])

In [None]:
spot_n = x.shape[1]

In [None]:
weight = dist.A
msk = np.logical_not(weight>0).astype(np.int16)
weight = np.ma.masked_array(weight, mask=msk)

In [None]:
x, y = ligand_mat[0,:].A, receptor_mat[0,:].A

In [None]:
spot_n


In [None]:
# reshape x and y to be the same shape as weight
x = np.reshape(np.repeat(x, spot_n), newshape=(spot_n, spot_n)).T
y = np.reshape(np.repeat(y, spot_n), newshape=(spot_n, spot_n)).T

In [None]:
# mask x and y with the same mask as weight
x_masked = np.ma.array(x, mask = weight.mask, fill_value=np.nan)
y_masked = np.ma.array(y, mask = weight.mask, fill_value=np.nan)

In [None]:
x_masked = stats.mstats.rankdata(x_masked, axis=1)
y_masked = stats.mstats.rankdata(y_masked, axis=1)

local p-values

In [None]:
from numpy import random
from tqdm import tqdm

In [None]:
rng = random.default_rng(0)
n_perm = 100
positive_only = True # remove this option?

In [None]:
dist = adata.obsm['proximity']

In [None]:
local_pc.shape

In [None]:
def _get_local_permutation_pvals(x_mat, y_mat, local_truth, local_fun, dist, n_perm, positive_only=True, **kwargs):
    xy_n = local_truth.shape[0]
    spot_n = local_truth.shape[1]
    
    print(spot_n)
    
    # permutation cubes to be populated
    local_pvals = np.zeros((xy_n, spot_n))
    
    # shuffle the matrix
    for i in tqdm(range(n_perm)):
        _idx = rng.permutation(spot_n)
        perm_score = local_fun(x_mat = x_mat[_idx, :], y_mat=y_mat, dist=dist, **kwargs) ## TODO switch to shuffle rows, not columns
        if positive_only:
            local_pvals += np.array(perm_score >= local_truth, dtype=int)
        else:
            local_pvals += (np.array(np.abs(perm_score) >= np.abs(local_truth), dtype=int))

    local_pvals = local_pvals / n_perm
    
    return local_pvals
    

In [None]:
from liana.method.sp._spatial_utils import _local_permutation_pvals

In [None]:
local_pvals = _local_permutation_pvals(x_mat = ligand_mat.A, 
                                       y_mat = receptor_mat.A, 
                                       local_truth=local_pc,
                                       local_fun=calculate_local_correlations,
                                       dist=dist, 
                                       n_perm=n_perm, 
                                       positive_only=False,
                                       seed=0)

In [None]:
local_pvals

In [None]:
local_pvals

In [None]:
from  scipy.sparse import csr_matrix

In [None]:
local_pvals.shape

In [None]:
local_pvals

In [None]:
local_pc.shape

In [None]:
local_pvals.shape

In [None]:
local_masked_pvals = _get_local_permutation_pvals(x_mat = ligand_mat.A,
                                                  y_mat = receptor_mat.A,
                                                  local_truth = masked_sp,
                                                  local_fun=calculate_masked_correlations,
                                                  dist=dist,
                                                  n_perm=n_perm,
                                                  positive_only=False)

In [None]:
local_masked_pvals

In [None]:
local_masked_pvals.shape

In [None]:
from scipy.stats import spearmanr,  pearsonr

In [None]:
pearsonr(local_masked_pvals[0,:], local_pvals[0,:])