# Efficiency LIANA+

In [1]:
import numpy as np
import pandas as pd
import anndata as ad
from scipy.sparse import csr_matrix

import scanpy as sc
import liana as li

from itertools import product

import psutil
from timeit import default_timer as timer

In [2]:
from benchmark import _benchmark, _sample_anndata, _sample_resource

## Single-cell

In [3]:
# Define Methods
def cpdb_no_perms(**kwargs):
    li.mt.cellphonedb(**kwargs, n_perms=None)
def cellchat_no_perms(**kwargs):
    li.mt.cellchat(**kwargs, n_perms=None)
def magnitude_only(**kwargs):
    li.mt.rank_aggregate(**kwargs, n_perms=None)
def rank_aggregate(**kwargs):
    aggregate = li.mt.AggregateClass(li.mt.aggregate_meta, methods=[li.mt.logfc,
                                                                         li.mt.cellphonedb,
                                                                         # TODO: get rid of CellChat all together??
                                                                         li.mt.cellchat,
                                                                         li.mt.natmi,
                                                                         li.mt.connectome,
                                                                         li.mt.singlecellsignalr
                                                                         ]
                                          )
    aggregate(**kwargs)

methods = {
    "CellPhoneDB": li.mt.cellphonedb,
    "CellChat": li.mt.cellchat,
    "CellPhoneDB_NP": cpdb_no_perms,
    "CellChat_NP": cellchat_no_perms,
    "Consensus": rank_aggregate,
    "Magnitude": magnitude_only,
    "NATMI": li.mt.natmi,
    "Connectome": li.mt.connectome,
    "log2FC": li.mt.logfc,
    }

In [4]:
benchmark_stats = pd.DataFrame(columns=["method", "n_obs", "time", "memory"])

obs_range = [1000, 5000, 10000, 50000, 100000]

for n_obs in obs_range:
    print(n_obs)
    adata = _sample_anndata(n_obs=n_obs)
    resource = _sample_resource(adata, n_lrs=3000)
        
    for method in methods.keys():
        for _ in range(5):
            print(method)
            time, memory = _benchmark(function=methods[method],
                                      adata=adata,
                                      resource=resource,
                                      groupby='cell_type',
                                      n_perms=1000,
                                      use_raw=False,
                                      verbose=False
                                      )
            benchmark_stats.loc[len(benchmark_stats)] = [method, n_obs, time, memory]
    benchmark_stats.to_csv("sc_stats.csv", index=False)

1000
 NNZ fraction: 0.0998465
CellPhoneDB


KeyboardInterrupt: 

## Local Spatial Scores

In [1]:
import squidpy as sq

In [None]:
sq.dataset

In [13]:
# NOTE: I'm not benchmarking masked_pearson here cuz it's much less efficient than any other score
methods = ["cosine", "jaccard", "spearman", "pearson", "morans"] # , "masked_spearman"

In [16]:
adata = _sample_anndata(n_obs=50000)
resource = _sample_resource(adata, n_lrs=3000)

 NNZ fraction: 0.099998885


In [17]:
li.mt.lr_bivar(adata, function_name="spearman", n_perms=None, use_raw=False, positive_only=False, verbose=True, resource=resource)

Using `.X`!


In [None]:
adata.uns['global_res']