In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import logging
import numpy as np
import xarray as xr
import pandas as pd
import time

import diffxpy as de
from batchglm.api.models.nb_glm import Simulator

logging.getLogger("tensorflow").setLevel(logging.WARNING)
logging.getLogger("batchglm").setLevel(logging.WARNING)
logging.getLogger("diffxpy").setLevel(logging.WARNING)

logger = logging.getLogger(__name__)


In [None]:
def test_wilcoxon(sim):
    logger.info("test_wilcoxon started")
    start = time.time()
    test = de.test_wilcoxon(
        data=sim.X,
        grouping="condition",
        sample_description=sim.sample_description
    )
    end = time.time()
    logger.info("test_wilcoxon finished")

    return test, (end - start)


def test_t_test(sim):
    logger.info("test_t_test started")
    start = time.time()
    test = de.test_t_test(
        data=sim.X,
        grouping="condition",
        sample_description=sim.sample_description
    )
    end = time.time()
    logger.info("test_t_test finished")

    return test, (end - start)


def test_lrt(sim):
    logger.info("test_lrt started")
    start = time.time()
    test = de.test_lrt(
        data=sim.X,
        full_formula_loc="~ 1 + condition",
        full_formula_scale="~ 1 + condition",
        reduced_formula_loc="~ 1",
        reduced_formula_scale="~ 1",
        sample_description=sim.sample_description
    )
    end = time.time()
    logger.info("test_lrt finished")

    return test, (end - start)


def test_wald_TF(sim):
    logger.info("test_wald_TF started")
    start = time.time()
    test = de.test_wald_loc(
        data=sim.X,
        factor_loc_totest="condition",
        formula="~ 1 + condition",
        sample_description=sim.sample_description
    )
    end = time.time()
    logger.info("test_wald_TF finished")

    return test, (end - start)


def test_wald_BFGS(sim):
    logger.info("test_wald_BFGS started")
    start = time.time()
    test = de.test_wald_loc(
        data=sim.X,
        factor_loc_totest="condition",
        formula="~ 1 + condition",
        sample_description=sim.sample_description,
        training_strategy="BFGS",
    )
    end = time.time()
    logger.info("test_wald_BFGS finished")

    return test, (end - start)


In [4]:
df = pd.DataFrame(columns=[
    "n_cells",
    "n_genes",
    "wilcoxon",
    "t_test",
    # "lrt",
    "wald_TF",
    "wald_BFGS",
])

n_genes = 1000
for n_cells in (1e3, 1e4, 1e5):
    n_cells = int(n_cells)
    sim = Simulator(num_observations=n_cells, num_features=n_genes)
    sim.generate_sample_description(num_batches=0)
    sim.generate()

    df = df.append(pd.DataFrame({
        "n_cells": n_cells,
        "n_genes": n_genes,
        "wilcoxon": test_wilcoxon(sim)[1],
        "t_test": test_t_test(sim)[1],
        # "lrt": test_lrt(sim)[1],
        "wald_TF": test_wald_TF(sim)[1],
        "wald_BFGS": test_wald_BFGS(sim)[1],
    }))


ValueError: If using all scalar values, you must pass an index