In [1]:
# Parameters
cpu = 1
group_name = "Isocortex"
mem_gb = 1


In [2]:
from ALLCools.clustering import *
import anndata

## Merge Adata

In [3]:
mc_adata = anndata.read_h5ad('mc_input.h5ad')
atac_adata = anndata.read_h5ad('atac_input.h5ad')

In [4]:
use_var = mc_adata.var_names.intersection(atac_adata.var_names)
mc_adata._inplace_subset_var(use_var)
atac_adata._inplace_subset_var(use_var)

In [5]:
adata_merge = mc_adata.concatenate(atac_adata,
                                   batch_categories=['mC', 'ATAC'],
                                   batch_key='Modality',
                                   index_unique=None)
adata_merge

  [AnnData(sparse.csr_matrix(a.shape), obs=a.obs) for a in all_adatas],


AnnData object with n_obs × n_vars = 594767 × 299329
    obs: 'read_count', 'Modality'
    var: 'chrom', 'end', 'start'

## Run LSI on Merged Adata

In [6]:
model = LSI(scale_factor=10000,
            n_components=50,
            algorithm="randomized",
            random_state=0)

In [7]:
use_cells_judge = adata_merge.obs['Modality'] == 'mC'

In [8]:
model.fit(adata_merge[use_cells_judge, :], downsample=200000)

<ALLCools.clustering.lsi.LSI at 0x7f881ff05ad0>

In [9]:
model.transform(adata_merge)

In [10]:
significant_pc_test(adata_merge, p_cutoff=0.1, obsm='X_lsi')

Downsample PC matrix to 50000 cells to calculate significant PC components


49 components passed P cutoff of 0.1.
Changing adata.obsm['X_pca'] from shape (594767, 50) to (594767, 49)


49

In [11]:
for m in ['mC', 'ATAC']:
    adata = adata_merge[adata_merge.obs['Modality'] == m]
    adata.write_h5ad(f'{m.lower()}_lsi.h5ad')

In [12]:
import subprocess
subprocess.run(['rm', '-f', 'mc_input.h5ad', 'atac_input.h5ad'])

CompletedProcess(args=['rm', '-f', 'mc_input.h5ad', 'atac_input.h5ad'], returncode=0)