In [None]:
import numpy as np
import anndata as ad
import scanpy as sc
from rosa.preprocessing import (
    clean_cells_genes,
)

RAW_ADATA_PT = "/Users/nsofroniew/Documents/data/multiomics/cell_census/tabula_sapiens_by_features.h5ad"
EMBEDS_ADATA_PT = "/Users/nsofroniew/Documents/data/multiomics/cell_census/tabula_sapiens_by_features_with_embeds_new_norm.h5ad"


In [None]:
adata = ad.read_h5ad(EMBEDS_ADATA_PT)

In [None]:
adata.layers['counts'].sum(axis=1)

In [None]:
adata.layers['counts_normalized'] = adata.layers['counts'].copy()
sc.pp.normalize_total(adata, target_sum=1e5, layer='counts_normalized')

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.hist(adata.layers['counts_normalized'].flatten());

In [None]:
from rosa.preprocessing import bin_expression, reconstruct_expression

In [None]:
bin_expression(adata, 128)

In [None]:
reconstruct_expression(adata)

In [None]:
((adata.X - adata.layers['reconstructed'])**2).mean()

In [None]:
# Plot residuals
plt.hist((adata.layers['reconstructed'] - adata.X).ravel(), bins=1000);
plt.xlim([-.25, .25]);

In [None]:
# Identify cells and genes not trained on (when possible)
adata.layers['prediction'] = adata.layers['reconstructed']
test_genes = np.logical_not(adata.var["train"])
test_cells = np.logical_not(adata.obs["train"])
adata_test = adata[test_cells, test_genes]
sc.tl.dendrogram(adata_test, groupby="label", use_rep="X")


In [None]:
from rosa.plotting import plot_marker_gene_heatmap

In [None]:
marker_genes_dict = adata_test.obs.set_index('label').to_dict()['marker_feature_name']
plot_marker_gene_heatmap(adata_test, marker_genes_dict)

In [None]:
plt.hist(adata.layers['binned'].flatten(), bins=25, density=True);

In [None]:
plt.hist(adata.X.flatten(), bins=250, density=True);
plt.ylim([0, 1]);
plt.xlim([0, 8])

In [None]:
adata.X

In [None]:
from typing import Union

In [None]:
adata.layers['X']

In [None]:
np.empty((10, 0))[0]

In [None]:
from enum import Enum, auto

class EmbeddingType(Enum):
    JOINT = auto()
    VAR = auto()
    OBS = auto()

In [None]:
EmbeddingType.JOINT

In [None]:
list(EmbeddingType.__members__)

In [None]:
adata.X = np.ceil(adata.X)
sc.pp.filter_genes(adata, min_cells=1)
sc.experimental.pp.normalize_pearson_residuals(adata)
adata.X[adata.X<0] = 0

In [None]:
2**10

In [None]:
import torch

In [None]:
np.isinf(adata.X).sum()

In [None]:
np.isnan(adata.X).sum()

In [None]:
sc.pp.log1p(adata)

In [None]:
adata.X

In [None]:
torch.tensor([0])

In [None]:
adata.X = np.ceil(adata.X)
adata.layers["counts"] = adata.X.copy()

In [None]:
sc.pp.filter_genes(adata, min_cells=1)

In [None]:
sc.pp.filter_cells(adata, min_genes=1)

In [None]:
adata

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.hist(adata.layers['counts'].sum(axis=1));

In [None]:
print(adata.layers['counts'].sum(axis=1).mean() / 1e5)
print(adata.layers['counts'].sum(axis=1).var() / 1e10)

In [None]:
adata.layers["counts_normalized_total"] = adata.X.copy()
sc.pp.normalize_total(adata, 1e5, layer="counts_normalized_total")

In [None]:
print(adata.layers['counts_normalized_total'].sum(axis=1).mean() / 1e5)
print(adata.layers['counts_normalized_total'].sum(axis=1).var() / 1e10)

In [None]:
adata.layers["counts_normalized_pearson"] = adata.X.copy()
adata.layers['counts_normalized_pearson'] = np.ceil(adata.layers['counts_normalized_pearson'])
sc.experimental.pp.normalize_pearson_residuals(adata, layer="counts_normalized_pearson", theta=1e2)

In [None]:
np.isnan(adata.layers['counts_normalized_pearson']).sum()

In [None]:
print(adata.layers['counts_normalized_pearson'].sum(axis=1).mean())
print(adata.layers['counts_normalized_pearson'].sum(axis=1).var())

In [None]:
plt.hist(adata.layers['counts_normalized_pearson'].flatten(), np.linspace(0, 100, 1000));

In [None]:
plt.hist(adata.layers['counts'].flatten(), np.linspace(0, 100, 1000));

In [None]:
np.log1p(0.1)

In [None]:
(adata.layers['counts_normalized_pearson'] - adata.layers['counts']).max()

In [None]:
adata.layers['counts_normalized_pearson'] = np.round(adata.layers['counts_normalized_pearson'])

In [None]:
np.isnan(adata.layers['counts_normalized_pearson'])[0]

In [None]:
adata.layers['counts_normalized_pearson'].min()

In [None]:
adata.uns['pearson_residuals_normalization']

In [None]:
TABULA_SAPIENS_BY_CELL_TYPE_WITH_EMBEDS_PT = "/Users/nsofroniew/Documents/data/multiomics/cell_census/tabula_sapiens_by_features_with_embeds_new_norm.h5ad"

In [None]:
adata_norm = ad.read_h5ad(TABULA_SAPIENS_BY_CELL_TYPE_WITH_EMBEDS_PT)

In [None]:
y = np.exp(adata_norm.X) - 1

In [None]:
y

In [None]:
import scanpy as sc

In [None]:
adata_norm.X = adata_norm.layers['counts'].copy()

In [None]:
sc.pp.normalize_total(adata_norm, 1e5)

In [None]:
(abs(adata_norm.X - y)).max()

In [None]:
# from scipy.special import kl_div

# y_hat = np.asarray(adata[keep_cells].X.flatten())
# y = np.asarray(adata[keep_cells].layers['prediction'].flatten())

# kl_div(y, y_hat).mean()

In [None]:
from scipy.stats import kstest, poisson

y_hat = np.asarray(adata[keep_cells].X.flatten())
y = np.asarray(adata[keep_cells].layers['prediction'].flatten())

kstest(y_hat, y)

In [None]:
kstest(y, 'poisson', args=(np.mean(y),))

In [None]:
from scipy.stats import kstest, poisson

poisson_dist = poisson(np.mean(y))

In [None]:
y_p = poisson_dist.rvs(size=10000)

In [None]:
hist, _ = np.histogram(y, bins=bins)
hist_hat, _ = np.histogram(y_hat, bins=bins)
plt.bar(bins[:-1], hist/hist.sum(), width=10/1000)
plt.bar(bins[:-1], hist_hat/hist_hat.sum(), alpha=0.5, width=10/1000)
plt.ylim([0, 1e-2]);
plt.xlim([0, 8]);

In [None]:
poisson_dist = poisson(np.mean(y))
y_new = poisson_dist.rvs(size=10000)


hist, _ = np.histogram(y, bins=bins)
hist_new, _ = np.histogram(y_new, bins=bins)
plt.bar(bins[:-1], hist/hist.sum(), width=10/1000)
plt.bar(bins[:-1], hist_new//hist_new.sum(), alpha=0.5, width=10/1000)
plt.ylim([0, 1e-2]);
plt.xlim([0, 8]);

In [None]:
initial_params = [.2, 0.1, 1.2, 0.0.001]

result = minimize(negative_binomial, initial_params, args=(y,), method='Nelder-Mead')

In [None]:
# Extract the optimal parameters
r1, p1, r2, p2 = result.x
data = y

In [None]:
from scipy.stats import nbinom

r1, p1, r2, p2 = (0.1, 0.1, 1.1, 0.1)

# nbinom_dist_1 = nbinom(9.1, 0.6)
# nbinom_dist_2 = nbinom(1.2, .001)
y_new = (nbinom.rvs(.2, 0.1, size=10000) + nbinom.rvs(1.2, 0.001, size=10000)) / 1000


hist, _ = np.histogram(y, bins=bins)
hist_new, _ = np.histogram(y_new, bins=bins)
plt.bar(bins[:-1], hist/hist.sum(), width=10/1000)
plt.bar(bins[:-1], hist_new/hist_new.sum(), alpha=0.5, width=10/1000)
plt.ylim([0, 1e-2]);
plt.xlim([0, 8]);

In [None]:
import numpy as np
from scipy.optimize import minimize
from scipy.stats import nbinom
import matplotlib.pyplot as plt

# Generate sample data
data = y

# Define the negative binomial function
def negative_binomial(params, data):
    r1, p1, r2, p2 = params
    pmf1 = nbinom.pmf(1000 * data, r1, p1)
    pmf2 = nbinom.pmf(1000 * data, r2, p2)
    return -np.log(pmf1 + pmf2).sum()

# Define the initial values for the parameters
initial_params = (0.1, 0.1, 1.1, 0.1)


# Minimize the negative binomial function using the Nelder-Mead algorithm
result = minimize(negative_binomial, initial_params, args=(data,), method='Nelder-Mead')

# Extract the optimal parameters
r1, p1, r2, p2 = result.x

# Plot the histogram of the data
plt.hist(data, bins=30, density=True, alpha=0.5, label='Data')

# Plot the sum of the negative binomials
x = np.arange(0, data.max())
pmf1 = nbinom.pmf(x, r1, p1)
pmf2 = nbinom.pmf(x, r2, p2)
plt.plot(x, pmf1 + pmf2, 'r-', lw=2, label='Sum of Negative Binomials')

plt.xlabel('x')
plt.ylabel('Probability')
plt.legend()
plt.show()


In [None]:
try:
  print(x)
except NameError:
  print("Variable x is not defined")

In [None]:
import torch

In [None]:
torch.long

In [None]:
a = torch.tensor(2.3).type(torch.long)

In [None]:
adata.varm['embedding'].shape

In [None]:
import matplotlib.pyplot as plt

plt.hist(adata.varm['embedding'].ravel(), bins=2000);
plt.xlim([-0.5, 0.5]);

In [None]:
adata.varm['embedding']

In [None]:
from sklearn.decomposition import PCA

In [None]:
# fit pca on training data
pca = PCA()
pca.fit(adata.varm['embedding'])


In [None]:
E = pca.transform(adata.varm['embedding'])

In [None]:
plt.plot(np.cumsum(pca.explained_variance_ratio_));
plt.xlim([0, 3042])

In [None]:
np.cumsum(pca.explained_variance_ratio_)[512]

In [None]:
plt.hist(E[:, :512].ravel(), bins=2000);
plt.xlim([-1.5, 1.5]);

In [None]:
E.shape

In [None]:
import anndata as ad
from rosa.preprocessing import (
    calculate_gene_embeddings_pca,
)


EMBEDS_ADATA_PT = "/Users/nsofroniew/Documents/data/multiomics/cell_census/tabula_sapiens_by_features_with_embeds_new_norm.h5ad"

In [None]:
adata = ad.read_h5ad(EMBEDS_ADATA_PT)
adata = calculate_gene_embeddings_pca(adata, 256)

In [None]:
adata.uns["embedding_pca"]

In [None]:
adata.write_h5ad(EMBEDS_ADATA_PT)

In [None]:
adata.varm['embedding_pca'].shape

In [None]:
adata.varm

In [None]:
ADATA_BULK_PT = "/Users/nsofroniew/Documents/data/multiomics/cell_census/tabula_sapiens_pbulk.h5ad"
adata = ad.read_h5ad(ADATA_BULK_PT)


In [None]:
adata.var.set_index("feature_id")

In [None]:
base_pt = '/home/ec2-user/enformer/Homo_sapiens.GRCh38.genes.enformer_embeddings'
var_id = 'ENSG00000280445'
full_pt = f'{base_pt}/{var_id}.pt'

In [None]:
import torch

In [None]:
for i in range(64):
    var = torch.load(full_pt, map_location='cpu')['embedding']
    var = torch.from_numpy(var).type(torch.float32)

In [None]:
type(var)

In [None]:
896 // 2

In [None]:
a = var.unsqueeze(0).unsqueeze(-3)
a.shape

In [None]:
fc = torch.nn.Conv2d(1, 10, (896, 1))

In [None]:
fc(a).view(a.shape[0], -1).shape

In [None]:
view(a.shape[0], -1)

In [None]:
import torch.nn.functional as F


class ToTensor(torch.nn.Module):
    """Convert ``numpy.ndarray`` to tensor.
    """
    def __init__(self, dtype: torch.dtype = torch.float32) -> None:
        super().__init__()
        self.dtype = dtype

    def forward(self, tensor: np.ndarray) -> torch.Tensor:
        return torch.from_numpy(tensor).type(torch.float32)

class CountNormalize(torch.nn.Module):
    """Normalize a tensor to a fixed total counts.
    """
    def __init__(self, total_counts=1):
        super().__init__()
        self.total_counts = total_counts

    def forward(self, tensor: torch.Tensor) -> torch.Tensor:
        return self.total_counts * F.normalize(tensor, p=1.0, eps=1e-12)


class Log1p(torch.nn.Module):
    """Log1p normalize a tensor.
    """
    def __init__(self):
        super().__init__()

    def forward(self, tensor: torch.Tensor) -> torch.Tensor:
        return torch.log1p(tensor)


class QuantileNormalize(torch.nn.Module):
    """Normalize a tensor by quantiles.
    """
    def __init__(self, n_bins):
        super().__init__()
        self.n_bins = n_bins

    def forward(self, tensor: torch.Tensor) -> torch.Tensor:
        boundaries = torch.quantile(tensor, torch.linspace(0, 1, self.n_bins))
        return torch.bucketize(tensor, boundaries)

In [None]:
class ExpressionTransform(torch.nn.Sequential):
    def __init__(self, cfg):
        # Add base transform
        transforms = [ToTensor()]

        if cfg.total_counts is not None:
            transforms.append(CountNormalize(cfg.total_counts))

        if cfg.log1p:
            transforms.append(Log1p())

        if cfg.n_bins is not None:
            transforms.append(QuantileNormalize(cfg.n_bins))

        super().__init__(*transforms)


from dataclasses import dataclass
from typing import Optional


@dataclass
class ExpressionTransformConfig:
    total_counts: Optional[int] = None
    log1p: Optional[bool] = None
    n_bins: Optional[int] = None

In [None]:
exp_cfg = ExpressionTransformConfig(n_bins=10)

In [None]:
tf = ExpressionTransform(exp_cfg)

In [None]:
torch.quantile(ToTensor()(X).unsqueeze(0), torch.linspace(0, 1, 5), dim=-1, keepdim=True).shape

In [None]:
ToTensor()(X).unsqueeze(0).shape

In [None]:
tf(np.random.rand(20))

In [None]:
X[1]

In [None]:
a = torch.nn.Sequential(ToTensor(), CountNormalize(10), Log1p(), QuantileNormalize(10))
b = torch.nn.Sequential(ToTensor(), QuantileNormalize(10))

In [None]:
X = np.random.rand(3, 30)
X [0, :3] = 0
X [0, :] = X [0, :] * 1000
# X = torch.randint(1, 5, size=(3, 3))

In [None]:
X

In [None]:
a(X)

In [None]:
b(X)

In [None]:
a(X).sum(dim=1)

In [None]:
20 * 512 / 1e3

In [None]:
type(torch.float32)

In [None]:
import numpy as np

In [None]:
np.unravel_index(10, (9, 2))

In [None]:
EMBEDS_ADATA_PT = '/Users/nsofroniew/Documents/data/multiomics/cell_census/tabula_sapiens_by_features_with_embeds_new_norm.h5ad'

In [None]:
EMBEDS_ADATA_PT_2 = "/Users/nsofroniew/Documents/data/multiomics/cell_census/tabula_sapiens_by_features_raw.h5ad"


In [None]:
import anndata as ad

In [None]:
adata = ad.read_h5ad(EMBEDS_ADATA_PT)
adata_2 = ad.read_h5ad(EMBEDS_ADATA_PT_2)

In [None]:
adata.layers['counts'][0, 0]

In [None]:
adata_2.X.sum(axis=1).mean()

In [None]:
adata.X[0, 0]

In [None]:
adata.layers['binned'][0, 0]

In [None]:
adata_2.X[0, 0]

In [None]:
from enum import Enum, auto
from typing import Optional


class ExpressionActivations(Enum):
    SOFTPLUS = auto()
    SOFTMAX = auto()

In [None]:
str(ExpressionActivations.SOFTPLUS)

In [None]:
ExpressionActivations.SOFTPLUS.name.lower()

In [None]:
import torch.nn as nn

In [None]:
a=nn.Sequential()

In [None]:
a.append(nn.Linear(1, 1))

In [None]:
body = nn.Identity()

In [None]:
body(torch.rand(10, 10)).shape

In [None]:
map((body, body), (torch.rand(10, 10), torch.rand(10, 10)))

In [None]:
torch.add((torch.rand(10, 10), torch.rand(10, 10))).shape

In [None]:
torch.cat((torch.rand(10, 10), torch.rand(10, 10)), dim=-1).shape

In [None]:
from typing import Tuple

class AttentionEmbeds(nn.Module):
    def __init__(self, in_dim: Tuple[int, int], out_dim) -> None:
        super(AttentionEmbeds, self).__init__()
        
        self.value = nn.Parameter(torch.randn(out_dim))
        self.activation = nn.GELU()
        self.out_dim = out_dim

    def forward(self, x: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
        atten = self.activation(torch.einsum('...i, ...i ->...', *x))
        return torch.einsum('..., i -> ...i', atten, self.value)

In [None]:
x_1 = torch.rand((10, 20))
x_2 = torch.rand((10, 20))
v = torch.rand(20)

In [None]:
atten = torch.einsum('...i, ...i ->...', x_1, x_2)

In [None]:
out = torch.einsum('..., i -> ...i', atten, v)

In [None]:
nn.Parameter(torch.randn(20))

In [None]:
a = AttentionEmbeds((20, 20), 30)

In [None]:
a((x_1, x_2)).shape

In [None]:
X = np.random.rand(100, 896, 3072)

In [None]:
PT = '/Users/nsofroniew/Documents/data/multiomics/enformer/scratch'

In [None]:
import zarr

In [None]:
z1 = zarr.open(PT + '/example.zarr', mode='w', shape=(1000, 896, 3072), chunks=(1, None, None), dtype='float32')

In [None]:
for i in range(10):
    z1[i * 100: (i+1)*100] = X

In [None]:
z2 = zarr.open(PT + '/example.zarr', mode='r')

In [None]:
from time import time

In [None]:
start = time()
z2[893]
stop = time()
print(stop - start)

In [None]:
torch.save({'results': X[0]}, PT + '/example_0.pt')

In [None]:
start = time()
torch.load(PT + '/example_0.pt')
stop = time()
print(stop - start)

In [None]:
class ZarrDataset(torch.utils.data.Dataset):
    def __init__(self, path: str):
        super(ZarrDataset, self).__init__()

        self.path = path

        self.array = zarr.open(path, mode='r')

    def __len__(self) -> int:
        return self.array.shape[0]

    def __getitem__(self, idx):
        return self.array[idx]

In [None]:
ds = ZarrDataset(PT + '/example.zarr')
dl = torch.utils.data.DataLoader(ds, shuffle=False, num_workers=2)

In [None]:
start = time()
for batch in iter(dl):
    pass
stop = time()
print(stop - start)

In [1]:
from rosa.datasets import RosaObsDataset, ToTensor, RosaObsVarDataset, RosaJointDataset
from rosa.config import ExpressionTransformConfig

from torch.utils.data import default_collate


Global seed set to 0
  new_rank_zero_deprecation(
  return new_rank_zero_deprecation(*args, **kwargs)


In [None]:
# isinstance(ds, RosaJointDataset)

In [None]:
import anndata as ad

ADATA_PT = "/Users/nsofroniew/Documents/data/multiomics/cell_census/tabula_sapiens_by_features_with_embeds_new_norm.h5ad"


adata = ad.read_h5ad(ADATA_PT)

In [None]:
from torch import Tensor
from typing import Optional, Tuple, List

In [None]:
ds = RosaObsDataset(adata, obs_input='embedding')

In [None]:
ds[0][0].shape

In [None]:
ds[0][1].shape

In [None]:
adata.varm['embedding_pca'].shape

In [None]:
2**14 / 19429

In [None]:
import torch

torch.empty((0, 0))

In [None]:
# class RosaObsVarDataset(RosaJointDataset):
#     def __init__(
#         self,
#         adata: ad.AnnData,
#         *,
#         var_input: str,
#         obs_input: str,
#         expression_layer: Optional[str] = None,
#         expression_transform_config: Optional[ExpressionTransformConfig] = None,
#     ) -> None:
#         super(RosaObsVarDataset, self).__init__(adata, obs_input=obs_input, var_input=var_input, expression_layer=expression_layer, expression_transform_config=expression_transform_config)

#     def __len__(self) -> int:
#         return self.expression.shape[0]

#     def __getitem__(self, idx: int) -> Tuple[Tuple[Tensor, Tensor], Tensor]:
#         obs_input = self.input[0][idx]
#         expression = self.expression[idx]
#         full_input = (obs_input.expand((self.input[1].shape[0], obs_input.shape[0])), torch.empty((self.input[1].shape[0], 0)))
#         return full_input, expression

#     def collate_fn(self, batch: List[Tuple[Tuple[Tensor, Tensor], Tensor]]) -> Tuple[Tuple[Tensor, Tensor], Tensor]:
#         (x0, _), y = default_collate(batch)
#         x1 = self.input[1].expand((x0.shape[0],) + self.input[1].shape)
#         return (x0.view(-1, x0.shape[-1]), x1.view(-1, x1.shape[-1])), y.view(-1)

In [None]:
ds = RosaObsVarDataset(adata, obs_input='embedding', var_input='embedding_pca')

In [None]:
ds[0][0][0].shape

In [None]:
ds[0][0][1].shape

In [None]:
ds[0][1].shape

In [None]:
from torch.utils.data import DataLoader


dl = DataLoader(
            ds,
            batch_size=64,
            shuffle=False,
            num_workers=0,
            # collate_fn=ds.collate_fn,
        )

In [None]:
batch = next(iter(dl))

In [None]:
batch[0][0].shape

In [None]:
batch[0][1][0].expand(batch[0][1].shape).shape

In [None]:
batch[1].shape

In [None]:
from torch.utils.data import default_collate


def obsvar_collate(batch):
    (x0, x1), y = default_collate(batch)
    return (x0.view(-1, x0.shape[-1]), x1.view(-1, x1.shape[-1])), y.view(-1)

In [None]:
x = ds[0][0][0]

In [None]:
x.expand((2000, 110)).shape

In [2]:
import torch

torch.arange(100)

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
        54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
        72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
        90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [11]:
z = torch.multinomial(torch.arange(100).float(), 10).long()

In [9]:
x = torch.rand(100, 1000)

In [13]:
x[z].shape

torch.Size([10, 1000])

In [14]:
from pytorch_lightning.utilities.rank_zero import LightningDeprecationWarning

In [15]:
import scanpy as sc

In [21]:
ADATA_PT = "/Users/nsofroniew/Documents/data/multiomics/cell_census/tabula_sapiens_pbulk.h5ad"

In [22]:
import anndata as ad

In [23]:
adata = ad.read_h5ad(ADATA_PT)

In [25]:
adata_2 = ad.concat([adata, adata])

  utils.warn_names_duplicates("obs")


In [26]:
adata_2

AnnData object with n_obs × n_vars = 1094 × 19429
    obs: 'dataset_id', 'cell_type', 'cell_type_ontology_term_id', 'development_stage', 'development_stage_ontology_term_id', 'disease', 'disease_ontology_term_id', 'donor_id', 'is_primary_data', 'self_reported_ethnicity', 'self_reported_ethnicity_ontology_term_id', 'sex', 'sex_ontology_term_id', 'suspension_type', 'label', 'sample', 'n_genes', 'train', 'marker_gene', 'marker_feature_name'
    obsm: 'bin_edges', 'embedding'
    layers: 'binned', 'counts', 'log1p', 'normalized_counts'

In [28]:
adata.uns['log1p']['base'] = None
sc.tl.rank_genes_groups(adata_2, 'label', method='wilcoxon')

  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group_name, 'scores'] = scores[global_indices]
  self.stats[group_name, 'pvals'] = pvals[global_indices]
  self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
  self.stats[group_name, 'logfoldchanges'] = np.log2(
  self.stats[group_name, 'names'] = self.var_names[global_indices]
  self.stats[group

In [29]:
adata_3 = adata_2[:adata.n_obs]

In [35]:
import pandas as pd

result = adata_3.uns['rank_genes_groups']
groups = result['names'].dtype.names
df = pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'pvals']})

In [37]:
adata_3.shape

(547, 19429)

In [36]:
df

Unnamed: 0,B cell_n,B cell_p,CD141-positive myeloid dendritic cell_n,CD141-positive myeloid dendritic cell_p,CD1c-positive myeloid dendritic cell_n,CD1c-positive myeloid dendritic cell_p,CD4-positive helper T cell_n,CD4-positive helper T cell_p,"CD4-positive, alpha-beta T cell_n","CD4-positive, alpha-beta T cell_p",...,type I NK T cell_n,type I NK T cell_p,type I pneumocyte_n,type I pneumocyte_p,type II pneumocyte_n,type II pneumocyte_p,vascular associated smooth muscle cell_n,vascular associated smooth muscle cell_p,vein endothelial cell_n,vein endothelial cell_p
0,ENSG00000159958,7.618023e-15,ENSG00000111647,2.868066e-07,ENSG00000204287,0.000002,ENSG00000138795,0.000003,ENSG00000106952,6.869823e-12,...,ENSG00000111796,0.000002,ENSG00000187821,0.000548,ENSG00000131400,0.000023,ENSG00000115468,0.000001,ENSG00000106852,8.839367e-08
1,ENSG00000153064,8.667712e-15,ENSG00000139083,3.190914e-07,ENSG00000161921,0.000002,ENSG00000142546,0.000004,ENSG00000152495,2.091872e-11,...,ENSG00000189430,0.000003,ENSG00000171476,0.000548,ENSG00000133661,0.000024,ENSG00000131471,0.000002,ENSG00000213088,1.055367e-07
2,ENSG00000156738,9.048288e-15,ENSG00000175115,3.259567e-07,ENSG00000231389,0.000003,ENSG00000144152,0.000004,ENSG00000168685,4.720689e-11,...,ENSG00000197540,0.000003,ENSG00000213853,0.000548,ENSG00000168907,0.000024,ENSG00000112214,0.000002,ENSG00000005102,1.204631e-07
3,ENSG00000177455,1.074205e-14,ENSG00000197992,4.029337e-07,ENSG00000102970,0.000003,ENSG00000089327,0.000004,ENSG00000174946,1.326802e-10,...,ENSG00000185697,0.000004,ENSG00000161653,0.000548,ENSG00000259803,0.000024,ENSG00000103175,0.000002,ENSG00000147113,1.231417e-07
4,ENSG00000136573,1.388310e-14,ENSG00000178685,4.115319e-07,ENSG00000196126,0.000003,ENSG00000117602,0.000005,ENSG00000167286,4.899408e-10,...,ENSG00000134539,0.000004,ENSG00000149435,0.000561,ENSG00000167972,0.000024,ENSG00000167641,0.000002,ENSG00000079337,1.286729e-07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19424,ENSG00000148411,8.751621e-09,ENSG00000110171,1.607161e-05,ENSG00000158856,0.000186,ENSG00000173905,0.000179,ENSG00000111321,1.261769e-07,...,ENSG00000060138,0.000264,ENSG00000013441,0.005693,ENSG00000150593,0.000893,ENSG00000143549,0.000791,ENSG00000081320,5.034301e-04
19425,ENSG00000139112,5.035018e-09,ENSG00000067141,1.607161e-05,ENSG00000142168,0.000176,ENSG00000185432,0.000174,ENSG00000154237,8.545828e-08,...,ENSG00000143545,0.000255,ENSG00000163577,0.005476,ENSG00000152518,0.000877,ENSG00000150593,0.000650,ENSG00000134250,4.329426e-04
19426,ENSG00000072110,2.411406e-09,ENSG00000106992,1.467081e-05,ENSG00000115306,0.000144,ENSG00000171604,0.000139,ENSG00000145431,5.295315e-08,...,ENSG00000010278,0.000238,ENSG00000091136,0.005370,ENSG00000187239,0.000714,ENSG00000205581,0.000650,ENSG00000026508,3.287332e-04
19427,ENSG00000075420,6.691607e-10,ENSG00000054654,6.405363e-06,ENSG00000203965,0.000093,ENSG00000107438,0.000139,ENSG00000038382,3.109290e-08,...,ENSG00000120885,0.000218,ENSG00000116991,0.004587,ENSG00000130340,0.000662,ENSG00000117632,0.000579,ENSG00000197343,2.728501e-04


In [None]:
logFCs, pvals = dc.get_contrast(adata,
                                group_col='cell_type',
                                condition_col='disease',
                                condition='COVID-19',
                                reference='normal',
                                method='t-test'
                               )