In [1]:
from ALLCools.clustering import *
from wmb import brain, cemba, cemba_atac
import numpy as np
import pandas as pd
import anndata

import matplotlib.pyplot as plt
from ALLCools.plot import *

from ALLCools.integration.seurat_class import SeuratIntegration

In [2]:
atac = cemba_atac.get_atac_annot()

In [3]:
categorical_key = ['L4Region', 'DissectionRegion']

In [4]:
# Parameters
categorical_key = ["L4Region", "DissectionRegion"]
cpu = 1
group_name = "Isocortex"
mem_gb = 1


## Input LSI before integration

In [5]:
# each adata contains the input cell-by-5kb-bin matrix in adata.X 
# and LSI components in adata.obsm['X_lsi']
mc_adata = anndata.read_h5ad('mc_lsi.h5ad')
atac_adata = anndata.read_h5ad('atac_lsi.h5ad')

In [6]:
adata_list = [mc_adata, atac_adata]

### Init empty adata_merge

In [7]:
# create an empty adata_merge for collecting results and make plots

from scipy.sparse import csr_matrix

cells = sum([a.shape[0] for a in adata_list])
features = adata_list[0].shape[1]

adata_merge = anndata.AnnData(
    X=csr_matrix((cells, features), dtype=np.float32),
    obs=pd.concat([a.obs for a in adata_list]),
    var=adata_list[0].var,
)

In [8]:
mc_annot = cemba.get_mc_annot()
atac_annot = cemba_atac.get_atac_annot()

L4_annot does not exist in data_vars or coords, palette not added.


In [9]:
for key in categorical_key:
    adata_merge.obs[key] = pd.concat(
        [mc_annot[key].to_pandas(), 
         atac_annot[key].to_pandas()]
    ).astype(str)

In [10]:
for adata in adata_list:
    for key in categorical_key:
        adata.obs[key] = adata_merge.obs[key].astype(str)

In [11]:
adata_list

[AnnData object with n_obs × n_vars = 94767 × 299329
     obs: 'read_count', 'Modality', 'L4Region', 'DissectionRegion'
     var: 'chrom', 'end', 'start'
     obsm: 'X_lsi',
 AnnData object with n_obs × n_vars = 500000 × 299329
     obs: 'read_count', 'Modality', 'L4Region', 'DissectionRegion'
     var: 'chrom', 'end', 'start'
     obsm: 'X_lsi']

In [12]:
adata_merge

AnnData object with n_obs × n_vars = 594767 × 299329
    obs: 'read_count', 'Modality', 'L4Region', 'DissectionRegion'
    var: 'chrom', 'end', 'start'

In [13]:
n_pc = adata_list[0].obsm["X_lsi"].shape[1]
if n_pc < 10:
    n_cca_components = n_pc
else:
    n_cca_components = max(n_pc - 10, 10)
n_cca_components = min(n_cca_components, 50)

n_cca_components

39

In [14]:
min_sample = adata_merge.obs["Modality"].value_counts().min()

## Integration and transform LSI

In [15]:
integrator = SeuratIntegration()

In [16]:
anchor = integrator.find_anchor(
    adata_list,
    k_local=None,
    key_local='X_lsi',
    k_anchor=5,
    key_anchor='X',
    dim_red='lsi',
    max_cc_cells=20000,
    k_score=30,
    k_filter=None,
    n_components=n_cca_components,
    n_features=200,
    alignments=[[[0], [1]]])

Find anchors across datasets.


Run LSI-CCA


Find Anchors using k=30


Score Anchors


Identified 248668 anchors between datasets 0 and 1.


In [17]:
min_sample = adata_merge.obs["Modality"].value_counts().min()
if min_sample < 500:
    k_weight = 50
elif min_sample < 300:
    k_weight = 30
else:
    k_weight = 100

In [18]:
try:
    corrected = integrator.integrate(
        key_correct="X_lsi",
        row_normalize=True,
        k_weight=k_weight,
        sd=1,
        alignments=[[[0], [1]]],
    )
except BaseException:
    for k_weight in range(50, 0, -5):
        if k_weight < 2:
            raise
        print(k_weight)
        try:
            corrected = integrator.integrate(
            key_correct="X_lsi",
            row_normalize=True,
            k_weight=k_weight,
            sd=1,
            alignments=[[[0], [1]]],
            )
        except BaseException:
            pass

adata_merge.obsm['X_lsi_integrate'] = np.concatenate(corrected)

Merge datasets
[[0], [1]]
Initialize
Find nearest anchors. 

  data=np.array(corrected),


k_weight:  100


Normalize graph


Transform data


## Label transfer

In [19]:
# transfer_results = integrator.label_transfer(
#     ref=[0],
#     qry=[1],
#     categorical_key=categorical_key,
#     key_dist='X_lsi'
# )
# for k, v in transfer_results.items():
#     v.to_hdf(f'{k}_transfer.hdf', key='data')
# integrator.save_transfer_results_to_adata(adata_merge, transfer_results)

## Save

In [20]:
adata_merge.write_h5ad('final.h5ad')

In [21]:
adata_merge

AnnData object with n_obs × n_vars = 594767 × 299329
    obs: 'read_count', 'Modality', 'L4Region', 'DissectionRegion'
    var: 'chrom', 'end', 'start'
    obsm: 'X_lsi_integrate'

In [22]:
integrator.save('integration')

In [23]:
import subprocess
subprocess.run(['rm', '-f', 'mc_lsi.h5ad', 'atac_lsi.h5ad'])

CompletedProcess(args=['rm', '-f', 'mc_lsi.h5ad', 'atac_lsi.h5ad'], returncode=0)