In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import scanpy as sc
import multigrate as mtg
import numpy as np

# Load the dataset

In [2]:
adata = sc.read('../data/10x-cite/10x-cite.h5ad')
adata

AnnData object with n_obs × n_vars = 10849 × 15792
    obs: 'n_genes', 'percent_mito', 'n_counts', 'batch', 'labels'
    var: 'encode', 'n_cells-0', 'highly_variable-0', 'hvg_encode-0', 'n_cells-1', 'highly_variable-1', 'hvg_encode-1'
    obsm: 'protein_expression'

In [3]:
adata.obs.batch = adata.obs.batch.astype('category')
ref = adata[adata.obs.batch == 0]
ref

View of AnnData object with n_obs × n_vars = 6855 × 15792
    obs: 'n_genes', 'percent_mito', 'n_counts', 'batch', 'labels'
    var: 'encode', 'n_cells-0', 'highly_variable-0', 'hvg_encode-0', 'n_cells-1', 'highly_variable-1', 'hvg_encode-1'
    obsm: 'protein_expression'

In [4]:
cite = sc.AnnData(ref.obsm['protein_expression'])

In [5]:
query = adata[adata.obs.batch == 1].copy()
query

AnnData object with n_obs × n_vars = 3994 × 15792
    obs: 'n_genes', 'percent_mito', 'n_counts', 'batch', 'labels'
    var: 'encode', 'n_cells-0', 'highly_variable-0', 'hvg_encode-0', 'n_cells-1', 'highly_variable-1', 'hvg_encode-1'
    obsm: 'protein_expression'

In [6]:
# workaround
query.obs['cell_type'] = query.obs['labels']
query

AnnData object with n_obs × n_vars = 3994 × 15792
    obs: 'n_genes', 'percent_mito', 'n_counts', 'batch', 'labels', 'cell_type'
    var: 'encode', 'n_cells-0', 'highly_variable-0', 'hvg_encode-0', 'n_cells-1', 'highly_variable-1', 'hvg_encode-1'
    obsm: 'protein_expression'

In [7]:
query_protein = sc.AnnData(query.obsm['protein_expression'])

In [8]:
def clr_normalize_each_cell(adata, inplace=True):
    """Normalize count vector for each cell, i.e. for each row of .X"""

    import numpy as np
    import scipy

    def seurat_clr(x):
        # TODO: support sparseness
        s = np.sum(np.log1p(x[x > 0]))
        exp = np.exp(s / len(x))
        return np.log1p(x / exp)

    if not inplace:
        adata = adata.copy()

    # apply to dense or sparse matrix, along axis. returns dense matrix
    adata.X = np.apply_along_axis(
        seurat_clr, 1, (adata.X.A if scipy.sparse.issparse(adata.X) else adata.X)
    )
    return adata

In [9]:
cite = clr_normalize_each_cell(cite)

In [10]:
query_protein = clr_normalize_each_cell(query_protein)

In [11]:
query_protein.obsm['protein'] = query_protein.X

In [12]:
ref.obs['cell_type'] = ref.obs['labels']

Trying to set attribute `.obs` of view, copying.


In [13]:
cite.obs = ref.obs
cite

AnnData object with n_obs × n_vars = 6855 × 14
    obs: 'n_genes', 'percent_mito', 'n_counts', 'batch', 'labels', 'cell_type'

In [18]:
ref.write('../data/test_protein/adata1.h5ad')
cite.write('../data/test_protein/cite1.h5ad')
query.write('../data/test_protein/adata2.h5ad')
query_protein.write('../data/test_protein/cite2.h5ad')