# Deal with Multi-omics/Multi-view scCCC

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import muon as mu
import liana as li

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from itertools import product

In [3]:
from liana.testing._sample_anndata import generate_toy_mdata
mdata = generate_toy_mdata()
mdata.mod['adata_y'].var.index = 'scaled:' + mdata.mod['adata_y'].var.index
sc.pp.scale(mdata.mod['adata_y'])



In [4]:
interactions = list(product(mdata.mod['adata_x'].var.index, mdata.mod['adata_y'].var.index))

In [5]:
resource = pd.DataFrame(interactions, columns=['ligand', 'receptor'])

In [6]:
resource

Unnamed: 0,ligand,receptor
0,HES4,scaled:HES4
1,HES4,scaled:TNFRSF4
2,HES4,scaled:SSU72
3,HES4,scaled:PARK7
4,HES4,scaled:RBP7
...,...,...
95,EFHD2,scaled:SRM
96,EFHD2,scaled:MAD2L2
97,EFHD2,scaled:AGTRAP
98,EFHD2,scaled:TNFRSF1B


In [7]:
li.mt.rank_aggregate(mdata, groupby='bulk_labels', mod_x='adata_x', mod_y='adata_y', use_raw=False, resource=resource, verbose=True, transform=True)

Using `.X`!
20 cells are empty, they will be removed.
Generating ligand-receptor stats for 680 samples and 20 features
Assuming that counts were `natural` log-normalized!




Running CellPhoneDB


100%|██████████| 1000/1000 [00:04<00:00, 239.27it/s]


Running Connectome
Running log2FC
Running NATMI
Running SingleCellSignalR
Running CellChat


100%|██████████| 1000/1000 [00:03<00:00, 260.17it/s]


In [8]:
mdata.uns['liana_res']

Unnamed: 0,source,target,ligand_complex,receptor_complex,lr_means,cellphone_pvals,expr_prod,scaled_weight,lr_logfc,spec_weight,lrscore,lr_probs,cellchat_pvals,specificity_rank,magnitude_rank
1038,CD4+/CD45RO+ Memory,CD4+/CD45RO+ Memory,PARK7,scaled:PARK7,0.563684,0.015,0.317740,0.461206,0.165597,0.015466,0.783310,0.426254,0.005,0.013317,6.053657e-10
1086,CD8+ Cytotoxic T,CD4+/CD45RO+ Memory,PARK7,scaled:PARK7,0.534070,0.001,0.284354,0.343557,0.129845,0.013841,0.773740,0.411399,0.000,0.004927,9.455265e-09
1493,CD4+/CD45RO+ Memory,CD8+ Cytotoxic T,PARK7,scaled:PARK7,0.534070,0.001,0.284354,0.343557,0.129845,0.013841,0.773740,0.411399,0.000,0.004927,9.455265e-09
1541,CD8+ Cytotoxic T,CD8+ Cytotoxic T,PARK7,scaled:PARK7,0.504456,0.041,0.254476,0.225908,0.094094,0.012387,0.763876,0.396704,0.002,0.034786,9.897550e-08
3956,CD4+/CD45RO+ Memory,Dendritic,PARK7,scaled:PARK7,0.515404,0.003,0.263311,0.269403,0.101287,0.012817,0.766940,0.386869,0.000,0.017210,1.006036e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40,CD4+/CD25 T Reg,CD4+/CD25 T Reg,TNFRSF4,scaled:AGTRAP,0.085657,0.298,0.007067,0.133913,0.020839,0.019459,0.350280,0.000000,1.000,0.424793,1.000000e+00
48,CD4+/CD25 T Reg,CD4+/CD25 T Reg,TNFRSF4,scaled:TNFRSF1B,0.083979,0.340,0.006725,0.139734,0.017362,0.020854,0.344647,0.000000,1.000,0.486529,1.000000e+00
4259,CD56+ NK,Dendritic,SRM,scaled:SRM,0.145566,0.801,0.020507,-0.083496,-0.036343,0.007850,0.478717,0.000000,1.000,1.000000,1.000000e+00
4252,CD56+ NK,Dendritic,SRM,scaled:PARK7,0.293281,0.710,0.055792,-0.056540,-0.022115,0.007616,0.602348,0.000000,1.000,1.000000,1.000000e+00


In [9]:
# TODO make a warning for transforming
li.mt.rank_aggregate(mdata, groupby='bulk_labels', mod_x='adata_x', mod_y='adata_y', use_raw=False, resource=resource, verbose=True)

Transforming data to zero-inflated min-max scale
Using `.X`!
20 cells are empty, they will be removed.
Generating ligand-receptor stats for 680 samples and 20 features
Assuming that counts were `natural` log-normalized!




Running CellPhoneDB


100%|██████████| 1000/1000 [00:04<00:00, 236.47it/s]


Running Connectome
Running log2FC
Running NATMI
Running SingleCellSignalR
Running CellChat


100%|██████████| 1000/1000 [00:03<00:00, 273.01it/s]


In [10]:
# TODO check how much do aggregate ranks agree
mdata.uns['liana_res']

Unnamed: 0,source,target,ligand_complex,receptor_complex,lr_means,cellphone_pvals,expr_prod,scaled_weight,lr_logfc,spec_weight,lrscore,lr_probs,cellchat_pvals,specificity_rank,magnitude_rank
1038,CD4+/CD45RO+ Memory,CD4+/CD45RO+ Memory,PARK7,scaled:PARK7,0.563684,0.015,0.317740,0.461206,0.165597,0.015466,0.783310,0.426254,0.005,0.013317,6.053657e-10
1086,CD8+ Cytotoxic T,CD4+/CD45RO+ Memory,PARK7,scaled:PARK7,0.534070,0.001,0.284354,0.343557,0.129845,0.013841,0.773740,0.411399,0.000,0.004927,9.455265e-09
1493,CD4+/CD45RO+ Memory,CD8+ Cytotoxic T,PARK7,scaled:PARK7,0.534070,0.001,0.284354,0.343557,0.129845,0.013841,0.773740,0.411399,0.000,0.004927,9.455265e-09
1541,CD8+ Cytotoxic T,CD8+ Cytotoxic T,PARK7,scaled:PARK7,0.504456,0.041,0.254476,0.225908,0.094094,0.012387,0.763876,0.396704,0.002,0.034786,9.897550e-08
3956,CD4+/CD45RO+ Memory,Dendritic,PARK7,scaled:PARK7,0.515404,0.003,0.263311,0.269403,0.101287,0.012817,0.766940,0.386869,0.000,0.017210,1.006036e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40,CD4+/CD25 T Reg,CD4+/CD25 T Reg,TNFRSF4,scaled:AGTRAP,0.085657,0.298,0.007067,0.133913,0.020839,0.019459,0.350280,0.000000,1.000,0.424793,1.000000e+00
48,CD4+/CD25 T Reg,CD4+/CD25 T Reg,TNFRSF4,scaled:TNFRSF1B,0.083979,0.340,0.006725,0.139734,0.017362,0.020854,0.344647,0.000000,1.000,0.486529,1.000000e+00
4259,CD56+ NK,Dendritic,SRM,scaled:SRM,0.145566,0.801,0.020507,-0.083496,-0.036343,0.007850,0.478717,0.000000,1.000,1.000000,1.000000e+00
4252,CD56+ NK,Dendritic,SRM,scaled:PARK7,0.293281,0.710,0.055792,-0.056540,-0.022115,0.007616,0.602348,0.000000,1.000,1.000000,1.000000e+00


In [11]:
from liana.funcomics.mdata_to_anndata import mdata_to_anndata

In [12]:
adata = mdata_to_anndata(mdata, mod_x='adata_x', mod_y='adata_y', transform=False)

In [13]:
adata.is_view

False

In [14]:
li.mt.rank_aggregate(adata, groupby='bulk_labels', use_raw=False, resource=resource, verbose=True)

Using `.X`!
20 cells are empty, they will be removed.
Generating ligand-receptor stats for 680 samples and 20 features
Assuming that counts were `natural` log-normalized!




Running CellPhoneDB


100%|██████████| 1000/1000 [00:04<00:00, 237.44it/s]


Running Connectome
Running log2FC
Running NATMI
Running SingleCellSignalR
Running CellChat


100%|██████████| 1000/1000 [00:03<00:00, 275.13it/s]


## li.mt.multiview(MuData, x_mod, y_mod, interactions, transform_fun=None, ...)

In [None]:
from anndata import concat

In [None]:
from scipy.sparse import csr_matrix, isspmatrix_csr

In [None]:
def zi_minmax(X, cutoff=0.1):
    X = X.copy()
    # Ensure the matrix is in Compressed Sparse Row (CSR) format
    if not isspmatrix_csr(X):
        X = csr_matrix(X)

    # Min-Max scaling on non-zero elements
    min_vals = np.array(X.min(axis=0).todense())[0]
    max_vals = np.array(X.max(axis=0).todense())[0]
    nonzero_rows, nonzero_cols = X.nonzero()
    scaled_values = (X.data - min_vals[nonzero_cols]) / (max_vals[nonzero_cols] - min_vals[nonzero_cols])
    
    # Apply cutoff to the matrix
    scaled_values[scaled_values < cutoff] = 0
    nonzero_rows, nonzero_cols = X.nonzero()

    # Create a new sparse matrix with scaled values
    X = csr_matrix((scaled_values, (nonzero_rows, nonzero_cols)), shape=X.shape)

    return X

In [None]:
## Refactor slightly liana pipe to accept interactions (i.e. list of tuples?)

In [None]:
# Then this just becomes a wrapper around the liana pipe, where the method of interest is passed as an argument? + We do zi_minmax to deal with different scales
# Then change select_resource to return tuples instead of a dataframe, and dataframe when df=True

In [None]:
# We take the two modalities and stack as values to the same AnnData
# transform_fun will be None and it can be applied by columns (i.e. by variable); e.g. lambda x: zi_minmax(x, axis=0)

In [None]:
interactions = list(product(mdata.mod['adata_x'].var.index, mdata.mod['adata_y'].var.index))

In [None]:
resource = pd.DataFrame(interactions, columns=['ligand', 'receptor'])

Actual function

In [None]:
def _mdata_to_anndata(mdata, mod_x, mod_y, transform=True):
    if mod_x == mod_y:
        raise ValueError('The two modalities must be different')
    if mod_x not in mdata.mod.keys():
        raise ValueError('The first modality is not in the mdata')
    if mod_y not in mdata.mod.keys():
        raise ValueError('The second modality is not in the mdata')
    
    md_x = mdata.mod[mod_x]
    md_y = mdata.mod[mod_y]
    
    adata = concat([md_x, md_y], join='outer', axis=1, merge='first', label='modality')
    
    if transform:
        adata.X = zi_minmax(adata.X)
    
    return adata
    

In [None]:
if isinstance(mdata, mu.MuData):
    adata = _mdata_to_anndata(mdata, 'adata_x', 'adata_y', transform=True)

In [None]:
adata

In [None]:
mod_x = 'adata_x'
mod_y = 'adata_y'

In [None]:
mdata.mod[mod_x].X = mdata.mod[mod_x].X.astype(np.float32)
mdata.mod[mod_y].X = mdata.mod[mod_x].X.astype(np.float32)

In [None]:
# Concat the two modalities
adata = concat([mdata.mod['adata_x'], mdata.mod['adata_y']], join='outer', axis=1, merge='first')

Apply minmax

In [None]:
# adata.X = zi_minmax(adata.X)
# sc.pp.filter_cells(adata, min_genes=1)

In [None]:
li.mt.cellphonedb(adata, resource=resource, groupby='bulk_labels', use_raw=False, verbose=True)

In [None]:
adata = sc.datasets.pbmc68k_reduced()

In [None]:
li.mt.rank_aggregate(adata, groupby='bulk_labels', expr_prop=0.1, verbose=True, use_raw=True)

In [None]:
liana_res = adata.uns['liana_res'].copy()

In [None]:
adata = adata.raw.to_adata()

In [None]:
adata.X = zi_minmax(adata.X)

In [None]:
li.mt.cellphonedb(adata, groupby='bulk_labels', expr_prop=0.1, verbose=True, use_raw=False)

In [None]:
minmax_res = adata.uns['liana_res']

In [None]:
liana_res

In [None]:
minmax_res['cellphone_pvals']

In [None]:
minmax_res.rename(columns={'cellphone_pvals': 'cellphone_pvals_minmax', 
                           'lr_means':'lr_minmax'}, inplace=True)

In [None]:
merged = minmax_res.merge(liana_res, how='inner', on=['ligand_complex', 'receptor_complex', 'source', 'target'])

In [None]:
pearsonr(merged['cellphone_pvals'], merged['cellphone_pvals_minmax'])

In [None]:
pearsonr(merged['lr_means'], merged['lr_minmax'])

## li.fun.estimate_metalinks(adata, est_fun ...) 
returns (metabolite_estimates:csr_matrix, receptors_masked:csr_matrix)

## li.mt.metalinks(adata, groupby, resource, ...)