In [None]:
import squidpy as sq
import scanpy as sc
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, isspmatrix_csr

In [None]:
import liana as li

In [None]:
from matplotlib.pyplot import hist

In [None]:
from liana.method._global_lr_pipe import _global_lr_pipe
from liana.method.sp._spatialdm import _get_ordered_matrix, _standardize_matrix

In [None]:
counts = pd.read_csv("data/counts_mat.csv")
weights = pd.read_csv("data/weight_mat.csv")

In [None]:
var = pd.DataFrame(counts[['Unnamed: 0']]).set_index('Unnamed: 0')
var.index.name = None

In [None]:
adata = sc.AnnData(X=csr_matrix(counts.drop(counts.columns[0], axis=1), dtype=np.float32).T, var=var)

In [None]:
adata.obsm['proximity'] = csr_matrix(weights)

In [None]:
resource = pd.DataFrame({"ligand":["Dnm1l", "Arrb1", "Igf2", "Dnm1l"], "receptor":["Gucy1b3", "Mtor", "Tuba1a", "Fam63b"]})

In [None]:
temp, lr_res, ligand_pos, receptor_pos = _global_lr_pipe(adata=adata,
                                                         resource=resource,
                                                         expr_prop=0.05,
                                                         use_raw=False,
                                                         verbose=True,
                                                         layer=None,
                                                         _key_cols=['ligand_complex', 'receptor_complex'],
                                                         _complex_cols=['ligand_means', 'receptor_means'],
                                                         _obms_keys=['proximity'],
                                                         resource_name=None
                                                         )

In [None]:
lr_res

Correlations

In [None]:
ligand_mat = _get_ordered_matrix(temp.X, ligand_pos, lr_res.ligand)
receptor_mat = _get_ordered_matrix(temp.X, receptor_pos, lr_res.receptor)

In [None]:
x, y = ligand_mat[0, :].A.flatten(), receptor_mat[0, :].A.flatten()

In [None]:
from scipy.stats import spearmanr, pearsonr, pointbiserialr, kendalltau

In [None]:
pearsonr(x, y)

One by One

In [None]:
def wcor(x, y, weight):
    weight_sum = np.sum(weight)
    
    numerator = weight_sum * np.sum(weight * x * y) - np.sum(weight * x) * np.sum(weight * y)
    denominator_x = weight_sum * np.sum(weight * (x**2)) - np.sum(weight * x)**2
    denominator_y = weight_sum * np.sum(weight * (y**2)) - np.sum(weight * y)**2
    
    wcor = numerator / np.sqrt(denominator_x * denominator_y)
    
    return wcor

In [None]:
adata.obsm['proximity']

In [None]:
i = 0

In [None]:
# all weights = 1 (i.e. pearsonr)
wcor(x, y, np.repeat(1, x.shape[0]))

In [None]:
weight = adata.obsm['proximity']

In [None]:
# calculate all local correlations
# technically, if we take i (a spot), x and y (e.g. genes),
# we go over all rows in w (i.e. every spot)
# and we re-calculate PC with the spatial weights 
# for the spots i to any other spot (j)
local_corrs = []
for i in range(weight.shape[0]):
    local_corrs.append(wcor(x, y, weight[i, :].A))
local_corrs = np.array(local_corrs)

In [None]:
hist(local_corrs)

In [None]:
local_corrs[0:5]

masked

In [None]:
import scipy.stats as stats

In [None]:
ligand_mat = _get_ordered_matrix(temp.X, ligand_pos, lr_res.ligand)
receptor_mat = _get_ordered_matrix(temp.X, receptor_pos, lr_res.receptor)
x, y = ligand_mat[0, :].A.flatten(), receptor_mat[0, :].A.flatten()

In [None]:
def wcor_ma(x, y, weight):
    weight_sum = np.sum(weight)
    
    n1 = weight_sum * np.dot(weight, x * y)
    n2 = np.dot(weight, x) * np.dot(weight, y)
    numerator = n1 - n2
    
    denominator_x = weight_sum * np.dot(weight, x**2) - np.dot(weight, x)**2
    denominator_y = weight_sum * np.dot(weight, y**2) - np.dot(weight, y)**2
    
    corr = numerator / np.sqrt(denominator_x * denominator_y)
    
    return corr

In [None]:
weight = adata.obsm['proximity'].A

In [None]:
# msk = np.isnan(weight)
msk = np.logical_not(weight>0).astype(np.int16)

In [None]:
weight = np.ma.masked_array(weight, mask=msk)

In [None]:
xy_n = len(x)

In [None]:
x_ma = np.reshape(np.repeat(x, xy_n), newshape=(xy_n, xy_n)).T
y_ma = np.reshape(np.repeat(y, xy_n), newshape=(xy_n, xy_n)).T

# x_ma = np.ma.masked_array(x_ma, mask=msk.T, fill_value=np.nan).T
# y_ma = np.ma.masked_array(y_ma, mask=msk.T, fill_value=np.nan).T

In [None]:
# x_ma = np.ma.masked_array(stats.mstats.rankdata(x_ma, axis=1), msk)
# y_ma = np.ma.masked_array(stats.mstats.rankdata(y_ma, axis=1), msk)

In [None]:
local_corrs = []
for i in range(weight.shape[0]):
    local_corrs.append(wcor_ma(x_ma[i, :], y_ma[i, :], weight[i, :]))
local_corrs = np.array(local_corrs)

In [None]:
local_corrs[0:5]

In [None]:
def masked_wcor(x, y, weight, method='spearman_nzw'):
    spot_n = x.shape[0]
    
    # reshape x and y to be the same shape as weight
    x = np.reshape(np.repeat(x, spot_n), newshape=(xy_n, spot_n)).T
    y = np.reshape(np.repeat(y, spot_n), newshape=(xy_n, spot_n)).T
    
    # mask x and y with the same mask as weight
    x_masked = np.ma.array(x, mask = weight.mask, fill_value=np.nan)
    y_masked = np.ma.array(y, mask = weight.mask, fill_value=np.nan)
    
    if method == 'spearman_nzw':
        x_masked = stats.mstats.rankdata(x_masked, axis=1)
        y_masked = stats.mstats.rankdata(y_masked, axis=1)
    
    # calculate weighted pearson correlation
    n1 = np.ma.sum(weight, axis=1) * np.ma.sum(weight * x_masked * y_masked, axis=1)
    n2 = np.ma.sum(weight * x_masked, axis=1) * np.ma.sum(weight * y_masked, axis=1)
    numerator = n1 - n2
    
    denominator_x = np.ma.sum(weight, axis=1) * np.ma.sum(weight * x_masked**2, axis=1) - np.ma.sum(weight * x_masked, axis=1)**2
    denominator_y = np.ma.sum(weight, axis=1) * np.ma.sum(weight * y_masked**2, axis=1) - np.ma.sum(weight * y_masked, axis=1)**2
    wcor = numerator / np.ma.sqrt(denominator_x * denominator_y)
    
    return wcor.data

In [None]:
masked_wcor(x, y, weight, method='spearman_nzw')[0:5]

In [None]:
local_correlations = []
for i in range(ligand_mat.shape[0]):
    local_correlations.append(masked_wcor(ligand_mat.A[i, :], receptor_mat.A[i, :], weight))
local_correlations = np.array(local_correlations)

In [None]:
local_correlations

Clean

In [None]:
import squidpy as sq
import scanpy as sc
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix, isspmatrix_csr

In [None]:
import liana as li

In [None]:
from matplotlib.pyplot import hist

In [None]:
from liana.method._global_lr_pipe import _global_lr_pipe
from liana.method.sp._spatialdm import _get_ordered_matrix, _standardize_matrix

In [None]:
from scipy.stats import rankdata

In [None]:
# toy data and toy resource
adata = sc.read_h5ad("test_spatial.h5ad")
resource = pd.DataFrame({"ligand":["Dnm1l", "Arrb1", "Igf2"],
                         "receptor":["Gucy1b3", "Mtor", "Tuba1a"]}
                        )

In [None]:
temp, lr_res, ligand_pos, receptor_pos = _global_lr_pipe(adata=adata,
                                                         resource=resource,
                                                         expr_prop=0.05,
                                                         use_raw=False,
                                                         verbose=True,
                                                         layer=None,
                                                         _key_cols=['ligand_complex', 'receptor_complex'],
                                                         _complex_cols=['ligand_means', 'receptor_means'],
                                                         _obms_keys=['proximity'],
                                                         resource_name=None
                                                         )

In [None]:
# ligand-receptor mats
ligand_mat = _get_ordered_matrix(temp.X, ligand_pos, lr_res.ligand)
receptor_mat = _get_ordered_matrix(temp.X, receptor_pos, lr_res.receptor)

In [None]:
x_mat = ligand_mat.A
y_mat = receptor_mat.A

In [None]:
dist = adata.obsm['proximity']

In [None]:
def calculate_local_correlations(x_mat, y_mat, dist, method="pearson"):
    if method not in ["pearson", "spearman", "spearman_nz"]:
        raise ValueError("method must be one of 'pearson', 'spearman', 'spearman_nz'")
    
    weight = dist.A.T ## does it really need to be A?
    weight_sums = np.sum(weight, axis = 0).flatten()
        
    if method=="spearman":
        x_mat = rankdata(x_mat, axis=1)
        y_mat = rankdata(y_mat, axis=1)
    
    # standard pearson
    n1 = (((x_mat * y_mat).dot(weight)) * weight_sums)
    n2 = (x_mat.dot(weight)) * (y_mat.dot(weight))
    numerator = n1 - n2
    
    denominator_x = (weight_sums * (x_mat ** 2).dot(weight)) - (x_mat.dot(weight))**2
    denominator_y = (weight_sums * (y_mat ** 2).dot(weight)) - (y_mat.dot(weight))**2
    
    local_corrs = (numerator / np.sqrt(denominator_x * denominator_y))
    
    return local_corrs

In [None]:
local_pc = calculate_local_correlations(x_mat, y_mat, dist, method="pearson")
local_sp = calculate_local_correlations(x_mat, y_mat, dist, method="spearman")

In [None]:
hist(local_pc[0,:])

In [None]:
weight = dist.A.T ## does it really need to be A?

In [None]:
np.mean(x_mat.dot(weight))

In [None]:
msk = np.int16(weight==0)

In [None]:
weight2 = np.ma.masked_array(data = weight, mask=msk, fill_value=np.nan)

In [None]:
x_mat2 = np.ma.masked_array(x_mat, mask=np.isnan(x_mat))

In [None]:
x_mat.shape

In [None]:
xx = np.ma.masked_array.dot(x_mat2, weight2)

In [None]:
weight2 * 2

In [None]:
np.mean(xx)

In [None]:
hist(local_spnz[0,:])

In [None]:
from scipy.stats import spearmanr, pearsonr

In [None]:
pearsonr(local_spnz[0,:], local_sp[0,:])

local p-values

In [None]:
from numpy import random
from tqdm import tqdm

In [None]:
rng = random.default_rng()
n_perm = 1000
positive_only = True # remove this option?

In [None]:
local_pc.shape

In [None]:
def _get_local_permutation_pvals(x_mat, y_mat, truth, function, dist, n_perm, positive_only=True, **kwargs):
    xy_n = truth.shape[0]
    spot_n = truth.shape[1]
    
    # permutation cubes to be populated
    local_pvals = np.zeros((xy_n, spot_n))
    
    # shuffle the matrix
    for i in tqdm(range(n_perm)):
        _idx = rng.permutation(spot_n)
        perm_r = function(x_mat = x_mat[:, _idx], y_mat=y_mat, dist=dist, **kwargs)
        if positive_only:
            local_pvals += np.array(perm_r >= truth, dtype=int)
        else:
            local_pvals += (np.array(np.abs(perm_r) >= np.abs(truth), dtype=int))

    local_pvals = local_pvals / n_perm
    
    return local_pvals
    

In [None]:
local_pvals = _get_local_permutation_pvals(x_mat, y_mat, local_pc, calculate_local_correlations, dist, n_perm, positive_only=True)

In [None]:
local_pc.shape[1]

In [None]:
local_pvals.T

In [None]:
spot_n = pc.shape[1]  # n of 1:1 edges 
xy_n = pc.shape[0] # n of x-y pairs (e.g. lrs)

# permutation cubes to be populated
local_pvals = np.zeros((xy_n, spot_n))

In [None]:

for i in tqdm(range(n_perm)):
    _idx = rng.permutation(x_mat.shape[0])
    perm_r = calculate_local_correlations(x_mat[_idx, :], y_mat, dist)
    if positive_only:
        local_pvals += np.array(perm_r >= pc, dtype=int)
    else:
        local_pvals += (np.array(np.abs(perm_r) >= np.abs(pc), dtype=int))

local_pvals = local_pvals / n_perm

In [None]:
local_pvals.shape