In [None]:
import scanpy as sc
import decoupler as dc

# Only needed for processing and plotting
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import liana as li

#### Pre-process

In [None]:
adata = sc.datasets.visium_sge(sample_id="V1_Human_Lymph_Node")
adata.var_names_make_unique()
adata

In [None]:
# Basic filtering
sc.pp.filter_cells(adata, min_genes=200)
sc.pp.filter_genes(adata, min_cells=10)

# Annotate the group of mitochondrial genes as 'mt'
adata.var['mt'] = adata.var_names.str.startswith('MT-')
sc.pp.calculate_qc_metrics(adata, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)

# Filter cells following standard QC criteria.
adata = adata[adata.obs.pct_counts_mt < 20, :]

# Normalize the data
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

# Identify highly variable genes
sc.pp.highly_variable_genes(adata)

# Filter higly variable genes
adata.raw = adata
adata = adata[:, adata.var.highly_variable]

# Scale the data
sc.pp.scale(adata, max_value=10)

In [None]:
# Generate PCA features
sc.tl.pca(adata, svd_solver='arpack')

# Compute distances in the PCA space, and find spot neighbors
sc.pp.neighbors(adata)

# Run leiden clustering algorithm
sc.tl.leiden(adata)

# Visualize
sc.pl.spatial(adata, color=[None, 'leiden'], size=1.5, wspace=0)

In [None]:
adata.write_h5ad("processed_visium.h5ad")

In [None]:
model = dc.get_progeny(organism='human', top=1000)
model

In [None]:
dc.run_mlm(mat=adata, net=model, source='source', target='target', weight='weight', verbose=True)

# Store them in a different key
adata.obsm['progeny_mlm_estimate'] = adata.obsm['mlm_estimate'].copy()
adata.obsm['progeny_mlm_pvals'] = adata.obsm['mlm_pvals'].copy()

In [None]:
acts = dc.get_acts(adata, obsm_key='progeny_mlm_estimate')
acts

In [None]:
acts.write_h5ad('progeny_acts.h5ad')

In [None]:
model = dc.get_dorothea(organism='human')

In [None]:
dc.run_mlm(mat=adata, net=model, source='source', target='target', weight='weight', verbose=True)

# Store them in a different key
adata.obsm['dorothea_mlm_estimate'] = adata.obsm['mlm_estimate'].copy()
adata.obsm['dorothea_mlm_pvals'] = adata.obsm['mlm_pvals'].copy()

In [None]:
acts = dc.get_acts(adata, obsm_key='dorothea_mlm_estimate')
acts

In [None]:
acts.write_h5ad('dorothea_acts.h5ad')

In [None]:
acts

## Any X,Y combo

SpatialDM becomes SpatialLRMethod

SpatialMethod is any X,Y

In [1]:
import liana as li

In [2]:

import scanpy as sc
import pandas as pd
import numpy as np

In [3]:
from mudata import MuData

In [4]:
from liana.utils._utils import _get_props
from scipy.sparse import csr_matrix

In [5]:
adata = sc.read_h5ad("processed_visium.h5ad")

In [6]:
progeny = sc.read_h5ad('progeny_acts.h5ad')

In [7]:
dorothea = sc.read_h5ad('dorothea_acts.h5ad')

In [8]:
mdata = MuData({"expr": adata, 'progeny': progeny, 'dorothea': dorothea})



In [9]:
# transfer coordinates
mdata.obsm["spatial"] = adata.obsm["spatial"]

In [10]:
li.mt.get_spatial_proximity(adata=mdata, parameter=100, bypass_diagonal=False, cutoff=0.1)

In [11]:
from liana.method.sp._spatialdm import _global_spatialdm, _local_to_dataframe, _local_spatialdm

In [12]:
from liana.method.sp._spatial_pipe import global_bivariate_pipe, _get_ordered_matrix

In [13]:
from liana.method.sp._bivariate_funs import _vectorized_spearman, _vectorized_pearson, _vectorized_wcosine, _vectorized_jaccard, _masked_pearson, _masked_spearman

In [14]:
from liana.method.sp._spatial_utils import _local_permutation_pvals

In [15]:
x_mod = 'progeny'
y_mod = 'dorothea'

In [16]:
xy_stats, x_pos, y_pos = global_bivariate_pipe(mdata, x_mod=x_mod, y_mod=y_mod, nz_threshold=0)

In [17]:
xy_stats

Unnamed: 0,interaction,x_entity,x_means,x_non_zero,y_entity,y_means,y_non_zero
0,Androgen&AHR,Androgen,1.055404,1.0,AHR,-0.134946,1.0
1,EGFR&AHR,EGFR,0.913506,1.0,AHR,-0.134946,1.0
2,Estrogen&AHR,Estrogen,-2.512781,1.0,AHR,-0.134946,1.0
3,Hypoxia&AHR,Hypoxia,1.044591,1.0,AHR,-0.134946,1.0
4,JAK-STAT&AHR,JAK-STAT,6.993168,1.0,AHR,-0.134946,1.0
...,...,...,...,...,...,...,...
4111,TNFa&ZNF740,TNFa,5.252833,1.0,ZNF740,0.856168,1.0
4112,Trail&ZNF740,Trail,-2.640625,1.0,ZNF740,0.856168,1.0
4113,VEGF&ZNF740,VEGF,0.928763,1.0,ZNF740,0.856168,1.0
4114,WNT&ZNF740,WNT,-0.208129,1.0,ZNF740,0.856168,1.0


To SpatialMethod--->

In [18]:
dist = mdata.obsm['proximity']
weight = dist.A.astype(np.float32)

In [19]:
# convert to spot_n x lr_n matrices
x_mat = _get_ordered_matrix(mat=mdata[x_mod].X,
                            pos=x_pos,
                            order=xy_stats['x_entity'])
y_mat = _get_ordered_matrix(mat=mdata[y_mod].X,
                            pos=y_pos,
                            order=xy_stats['y_entity'])

In [20]:
weight.dtype

dtype('float32')

In [21]:
x_mat

<4116x4032 sparse matrix of type '<class 'numpy.float32'>'
	with 16595712 stored elements in Compressed Sparse Column format>

In [22]:
import numba as nb

In [23]:
local_pc = _vectorized_pearson(x_mat.T.A, y_mat.T.A, dist)

NameError: name '_vectorized_correlations' is not defined

In [None]:
import numba as nb

In [None]:
local_sp_pvals = _local_permutation_pvals(x_mat = x_mat.A.T, 
                                          y_mat = y_mat.A.T, 
                                          local_truth=masked_pc,
                                          local_fun=_vectorized_correlations,
                                          dist=dist, 
                                          n_perm=20, 
                                          positive_only=False,
                                          seed=0,
                                          method="pearson"
                                          )

In [None]:
from matplotlib.pyplot import hist

THEN LASSO Regularization on each spot

where we try to see which variables explain the best the rest of the variables in that spot?

(considering each spot a center of signalling in the perphery)

Categorize

In [None]:
# create two matrices
x = np.array([[1, 0, -1, -1], [1, 0, 1, -1]])
y = np.array([[1, 0, 1, -1], [-1, -1, 0, -1]])

In [None]:
def _encode_as_char(a):
    a = np.where(a > 0, 'P', np.where(a<0, 'N', 'Z'))
    return a

In [None]:
def _categorize(x, y):
    cat = np.core.defchararray.add(x, y)
    return cat

In [None]:
def _simplify_cats(a):
    result = np.where(np.char.find(a, 'Z') >= 0, 'U',
              np.where(a == 'PP', 'P',
                       np.where(a == 'NN', 'P*',
                                np.where(np.char.find(a, 'N') >= 0, 'N', a))))
    return result

In [None]:
cats = _categorize(_encode_as_char(x), _encode_as_char(y))

In [None]:
cats

In [None]:
_simplify_cats(cats)

In [None]:
cats = _categorize(_encode_as_char(x_mat.A), _encode_as_char(y_mat.A))

In [None]:
_simplify_cats(cats)

In [None]:
x_mat

Test Analytical Solution

In [None]:
from liana.method.sp._spatialdm import _get_local_var, _standardize_matrix

In [None]:
from scipy.stats import norm

In [None]:
def _local_zscore_pvals(x_mat, y_mat, local_r, dist, positive_only):
    spot_n = dist.shape[0]

    x_norm = np.apply_along_axis(norm.fit, axis=0, arr=x_mat)
    y_norm = np.apply_along_axis(norm.fit, axis=0, arr=y_mat)

    # get x,y std
    x_sigma, y_sigma = x_norm[1, :], y_norm[1, :]

    x_sigma = x_sigma * spot_n / (spot_n - 1)
    y_sigma = y_sigma * spot_n / (spot_n - 1)

    std = _get_local_var(x_sigma, y_sigma, dist, spot_n)
    local_zscores = local_r / std

    if positive_only:
        local_zpvals = norm.sf(local_zscores)
        pos_msk = ((x_mat > 0) + (y_mat > 0)).T  # mask?
        local_zpvals[~pos_msk] = 1
    else:
        local_zpvals = norm.sf(np.abs(local_zscores))

    return local_zpvals

In [None]:
x_norm, y_norm = _standardize_matrix(x_mat), _standardize_matrix(y_mat)

In [None]:
local_zpvals = _local_zscore_pvals(x_norm.T, y_norm.T, masked_pc, dist, True)

In [None]:
np.min(local_zpvals)

In [None]:
hist(local_zpvals[0,:])

In [None]:
masked_pc

In [None]:
# n / sum(W) for Moran's I
norm_factor = mdata.obsm['proximity'].shape[0] / mdata.obsm['proximity'].sum()
dist = csr_matrix(norm_factor * mdata.obsm['proximity'])

In [None]:
# we use the same gene expression matrix for both x and y
xy_stats['global_r'], xy_stats['global_pvals'] = \
    _global_spatialdm(x_mat=progeny.X,
                      y_mat=dorothea.X,
                      x_pos=x_pos,
                      y_pos=y_pos,
                      xy_dataframe=xy_stats,
                      dist=dist,
                      seed=0,
                      n_perm=1000,
                      pvalue_method="analytical",
                      positive_only=False,
                      x_key = 'x_entity',
                      y_key = 'y_entity'
                      )
local_r, local_pvals = _local_spatialdm(x_mat=progeny.X,
                                        y_mat=dorothea.X,
                                        x_pos=x_pos,
                                        y_pos=y_pos,
                                        xy_dataframe=xy_stats,
                                        dist=dist,  # TODO msq?
                                        seed=0,
                                        n_perm=1000,
                                        pvalue_method="analytical",
                                        positive_only=False,
                                        x_key = 'x_entity',
                                        y_key = 'y_entity'
                                        )

In [None]:
# convert to dataframes
local_r = _local_to_dataframe(array=local_r,
                              idx=adata.obs.index,
                              columns=xy_res.interaction)
local_pvals = _local_to_dataframe(array=local_pvals,
                                  idx=adata.obs.index,
                                  columns=xy_res.interaction)

In [None]:
adata.uns['global_res'] = xy_res
adata.obsm['local_r'] = local_r
adata.obsm['local_pvals'] = local_pvals

In [None]:
xy_res.sort_values(by='global_r', ascending=False).head(10)

In [None]:
local_pvals = li.ut.obsm_to_adata(adata, obsm_key='local_pvals')
local_rs = li.ut.obsm_to_adata(adata, obsm_key='local_r')

In [None]:
sc.pl.spatial(local_rs, color=['JAK-STAT&STAT2'], cmap='coolwarm', vmax=5)

In [None]:
sc.pl.spatial(local_pvals, color=['JAK-STAT&STAT2'], cmap='coolwarm_r')

In [None]:
sc.pl.spatial(progeny, color=['JAK-STAT'], use_raw=False, cmap='coolwarm', vmax=5)

In [None]:
sc.pl.spatial(dorothea, color=['STAT2'], use_raw=False, cmap='coolwarm', vmax=5)

In [None]:
adata

Run SpatialDM

In [None]:
from liana.method.