In [None]:
from ALLCools.clustering import *
from wmb import brain, cemba, cemba_atac
import numpy as np
import pandas as pd
import anndata

import matplotlib.pyplot as plt
from ALLCools.plot import *

from ALLCools.integration.seurat_class import SeuratIntegration

In [None]:
categorical_key = ['L1_annot', 'L2', 'L3', 'DissectionRegion']

## Input LSI before integration

In [None]:
# each adata contains the input cell-by-5kb-bin matrix in adata.X 
# and LSI components in adata.obsm['X_lsi']
mc_adata = anndata.read_h5ad('mc_lsi.h5ad')
atac_adata = anndata.read_h5ad('atac_lsi.h5ad')

In [None]:
adata_list = [mc_adata, atac_adata]

### Init empty adata_merge

In [None]:
# create an empty adata_merge for collecting results and make plots

from scipy.sparse import csr_matrix

cells = sum([a.shape[0] for a in adata_list])
features = adata_list[0].shape[1]

adata_merge = anndata.AnnData(X=csr_matrix(([], ([], [])),
                                           shape=(cells, features)),
                              obs=pd.concat([a.obs for a in adata_list]),
                              var=adata_list[0].var)

In [None]:
mc_annot = cemba.get_mc_annot()
atac_annot = cemba_atac.get_atac_annot()

In [None]:
for key in categorical_key:
    adata_merge.obs[key] = pd.concat(
        [mc_annot[key].to_pandas(), 
         atac_annot[key].to_pandas()]
    ).astype(str)

In [None]:
for adata in adata_list:
    for key in categorical_key:
        adata.obs[key] = adata_merge.obs[key].astype(str)

In [None]:
adata_list

## Integration and transform LSI

In [None]:
integrator = SeuratIntegration()

In [None]:
anchor = integrator.find_anchor(
    adata_list,
    k_local=None,
    key_local='X_lsi',
    k_anchor=5,
    key_anchor='X',
    dim_red='lsi',
    max_cc_cells=100000,
    k_score=30,
    k_filter=None,
    scale1=False,
    scale2=False,
    n_components=50,
    n_features=200,
    alignments=[[[0], [1]]])

In [None]:
corrected = integrator.integrate(key_correct='X_lsi',
                                 row_normalize=True,
                                 n_components=30,
                                 k_weight=100,
                                 sd=1,
                                 alignments=[[[0], [1]]])

adata_merge.obsm['X_lsi_integrate'] = np.concatenate(corrected)

## Label transfer

In [None]:
transfer_results = integrator.label_transfer(
    ref=[0],
    qry=[1],
    categorical_key=categorical_key,
    key_dist='X_lsi'
)

In [None]:
for k, v in transfer_results.items():
    v.to_hdf(f'{k}_transfer.hdf', key='data')

In [None]:
integrator.save_transfer_results_to_adata(adata_merge, transfer_results)

## Save

In [None]:
adata_merge.write_h5ad('final.h5ad')

In [None]:
adata_merge

In [None]:
integrator.save('integration')