# Deal with Multi-omics/Multi-view scCCC

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import muon as mu
import liana as li

  from .autonotebook import tqdm as notebook_tqdm


## li.mt.multiview(MuData, x_mod, y_mod, interactions, transform_fun=None, ...)

In [2]:
from itertools import product

In [3]:
from anndata import concat

In [4]:
from scipy.sparse import csr_matrix, isspmatrix_csr

In [25]:
def zi_minmax(X, cutoff=0.1):
    X = X.copy()
    # Ensure the matrix is in Compressed Sparse Row (CSR) format
    if not isspmatrix_csr(X):
        X = csr_matrix(X)

    # Min-Max scaling on non-zero elements
    min_vals = np.array(X.min(axis=0).todense())[0]
    max_vals = np.array(X.max(axis=0).todense())[0]
    nonzero_rows, nonzero_cols = X.nonzero()
    scaled_values = (X.data - min_vals[nonzero_cols]) / (max_vals[nonzero_cols] - min_vals[nonzero_cols])
    
    # Apply cutoff to the matrix
    scaled_values[scaled_values < cutoff] = 0
    nonzero_rows, nonzero_cols = X.nonzero()

    # Create a new sparse matrix with scaled values
    X = csr_matrix((scaled_values, (nonzero_rows, nonzero_cols)), shape=X.shape)

    return X

In [26]:
## Refactor slightly liana pipe to accept interactions (i.e. list of tuples?)

In [27]:
# Then this just becomes a wrapper around the liana pipe, where the method of interest is passed as an argument? + We do zi_minmax to deal with different scales
# Then change select_resource to return tuples instead of a dataframe, and dataframe when df=True

In [28]:
# We take the two modalities and stack as values to the same AnnData
# transform_fun will be None and it can be applied by columns (i.e. by variable); e.g. lambda x: zi_minmax(x, axis=0)

In [29]:
from liana.testing._sample_anndata import generate_toy_mdata
mdata = generate_toy_mdata()
mdata.mod['adata_y'].var.index = mdata.mod['adata_y'].var.index + '_y'
sc.pp.scale(mdata.mod['adata_y'])



In [30]:
interactions = list(product(mdata.mod['adata_x'].var.index, mdata.mod['adata_y'].var.index))

In [31]:
resource = pd.DataFrame(interactions, columns=['ligand', 'receptor'])

Actual function

In [32]:
mod_x = 'adata_x'
mod_y = 'adata_y'

In [33]:
mdata.mod[mod_x].X = mdata.mod[mod_x].X.astype(np.float32)
mdata.mod[mod_y].X = mdata.mod[mod_x].X.astype(np.float32)

In [34]:
# Concat the two modalities
adata = concat([mdata.mod['adata_x'], mdata.mod['adata_y']], join='outer', axis=1, merge='first')

Apply minmax

In [35]:
adata.X = zi_minmax(adata.X)
sc.pp.filter_cells(adata, min_genes=1)

In [37]:
li.mt.cellphonedb(adata, resource=resource, groupby='bulk_labels', use_raw=False, verbose=True)

Using `.X`!
0.09 of entities in the resource are missing from the data.
Generating ligand-receptor stats for 680 samples and 20 features


100%|██████████| 1000/1000 [00:03<00:00, 287.75it/s]


In [51]:
adata = sc.datasets.pbmc68k_reduced()

In [54]:
li.mt.cellphonedb(adata, groupby='bulk_labels', expr_prop=0.1, verbose=True, use_raw=True)

Using `.raw`!
0.94 of entities in the resource are missing from the data.
Generating ligand-receptor stats for 700 samples and 765 features


100%|██████████| 1000/1000 [00:03<00:00, 253.26it/s]


In [56]:
liana_res = adata.uns['liana_res'].copy()

In [57]:
adata = adata.raw.to_adata()

In [58]:
adata.X = zi_minmax(adata.X)

In [59]:
li.mt.cellphonedb(adata, groupby='bulk_labels', expr_prop=0.1, verbose=True, use_raw=False)

Using `.X`!
0.94 of entities in the resource are missing from the data.
Generating ligand-receptor stats for 700 samples and 765 features


100%|██████████| 1000/1000 [00:03<00:00, 262.97it/s]


In [61]:
minmax_res = adata.uns['liana_res']

In [62]:
liana_res

Unnamed: 0,ligand,ligand_complex,ligand_means,ligand_props,receptor,receptor_complex,receptor_means,receptor_props,source,target,lr_means,cellphone_pvals
482,HLA-DRA,HLA-DRA,4.537684,0.995833,CD4,CD4,0.612842,0.421053,Dendritic,CD4+/CD45RO+ Memory,2.575263,0.000
321,HLA-DRA,HLA-DRA,4.537684,0.995833,CD4,CD4,0.596125,0.500000,Dendritic,CD4+/CD45RA+/CD25- Naive T,2.566905,0.000
989,HLA-DRA,HLA-DRA,4.537684,0.995833,CD4,CD4,0.483977,0.302326,Dendritic,CD14+ Monocyte,2.510830,0.000
651,HLA-DRA,HLA-DRA,4.537684,0.995833,LAG3,LAG3,0.399500,0.240741,Dendritic,CD8+ Cytotoxic T,2.468592,0.000
1392,HLA-DRA,HLA-DRA,4.537684,0.995833,CD4,CD4,0.373671,0.270833,Dendritic,Dendritic,2.455678,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...
1148,GNAI2,GNAI2,0.183538,0.153846,S1PR5,S1PR5,0.171846,0.153846,CD34+,CD34+,0.177692,0.914
1369,TNFSF13B,TNFSF13B,0.212000,0.153846,CD40,CD40,0.142317,0.100000,CD34+,Dendritic,0.177158,0.507
277,LCK,LCK,0.192800,0.126316,CD8A,CD8A_CD8B,0.161000,0.125000,CD19+ B,CD4+/CD45RA+/CD25- Naive T,0.176900,0.998
332,LCK,LCK,0.169550,0.116667,CD8A,CD8A_CD8B,0.161000,0.125000,Dendritic,CD4+/CD45RA+/CD25- Naive T,0.165275,1.000


In [66]:
minmax_res['cellphone_pvals']

1205    0.000
1221    0.000
482     0.000
321     0.000
983     0.000
        ...  
740     1.000
1157    1.000
277     0.997
332     1.000
303     0.926
Name: cellphone_pvals, Length: 1288, dtype: float64

In [68]:
minmax_res.rename(columns={'cellphone_pvals': 'cellphone_pvals_minmax', 
                           'lr_means':'lr_minmax'}, inplace=True)

In [71]:
merged = minmax_res.merge(liana_res, how='inner', on=['ligand_complex', 'receptor_complex', 'source', 'target'])

## li.fun.estimate_metalinks(adata, est_fun ...) 
returns (metabolite_estimates:csr_matrix, receptors_masked:csr_matrix)

## li.mt.metalinks(adata, groupby, resource, ...)