In [1]:
from ALLCools.clustering import *
from wmb import brain, cemba, aibs
import numpy as np
import pandas as pd
import anndata

import matplotlib.pyplot as plt
from ALLCools.plot import *

from ALLCools.integration.seurat_class import SeuratIntegration

In [2]:
dataset = 'AIBS_SMART'
categorical_key = ['L1_annot',  'DissectionRegion']

## Input LSI before integration

In [3]:
ref_adata = anndata.read_h5ad('./adata/rna_pca.h5ad')
query_adata = anndata.read_h5ad('./adata/merfish_pca.h5ad')

In [4]:
adata_list = [ref_adata, query_adata]

### Init empty adata_merge

In [5]:
from scipy.sparse import csr_matrix

cells = sum([a.shape[0] for a in adata_list])
features = adata_list[0].shape[1]

adata_merge = anndata.AnnData(X=csr_matrix(([], ([], [])),
                                           shape=(cells, features)),
                              obs=pd.concat([a.obs for a in adata_list]),
                              var=adata_list[0].var)

  if __name__ == "__main__":


In [6]:
if dataset == 'AIBS_SMART':
    rna_annot = aibs.get_smart_annot()
elif dataset == 'AIBS_TENX':
    rna_annot = aibs.get_tenx_annot()
else:
    rna_annot = broad.get_tenx_annot()


In [7]:
for key in categorical_key:
    adata_merge.obs[key] =rna_annot[key].to_pandas()
    

In [8]:
for adata in adata_list:
    for key in categorical_key:
        adata.obs[key] = adata_merge.obs[key]

In [9]:
adata_list

[AnnData object with n_obs × n_vars = 100000 × 483
     obs: 'count', 'read_count', 'n_counts', 'leiden', 'Modality', 'L1_annot', 'DissectionRegion'
     var: 'mean-RNA', 'std-RNA'
     obsm: 'X_pca',
 AnnData object with n_obs × n_vars = 66356 × 483
     obs: 'count', 'read_count', 'n_counts', 'leiden', 'Modality', 'L1_annot', 'DissectionRegion'
     var: 'mean-RNA', 'std-RNA'
     obsm: 'X_pca']

In [10]:
adata_merge

AnnData object with n_obs × n_vars = 166356 × 483
    obs: 'count', 'read_count', 'n_counts', 'leiden', 'Modality', 'L1_annot', 'DissectionRegion'
    var: 'mean-RNA', 'std-RNA'

## Integration and transform

In [11]:
integrator = SeuratIntegration()

In [12]:
anchor = integrator.find_anchor(adata_list,
                                k_local=None,
                                key_local='X_pca',
                                k_anchor=5,
                                key_anchor='X',
                                dim_red='cca',
                                max_cc_cells=100000,
                                k_score=30,
                                k_filter=None,
                                scale1=False,
                                scale2=False,
                                n_components=50,
                                n_features=200,
                                alignments=[[[0], [1]]])

Find anchors across datasets.
Run CCA
non zero dims 50
Find Anchors
Score Anchors
Identified 85962 anchors between datasets 0 and 1.


In [13]:
corrected = integrator.integrate(key_correct='X_pca',
                                 row_normalize=True,
                                 n_components=30,
                                 k_weight=100,
                                 sd=1,
                                 alignments=[[[0], [1]]])

adata_merge.obsm['X_pca_integrate'] = np.concatenate(corrected)

Merge datasets
[[0], [1]]
Initialize
Find nearest anchors


  data=np.array(corrected),


Normalize graph
Transform data


## Label transfer

In [14]:
adata_merge

AnnData object with n_obs × n_vars = 166356 × 483
    obs: 'count', 'read_count', 'n_counts', 'leiden', 'Modality', 'L1_annot', 'DissectionRegion'
    var: 'mean-RNA', 'std-RNA'
    obsm: 'X_pca_integrate'

In [15]:
transfer_results = integrator.label_transfer(
    ref=[0],
    qry=[1],
    categorical_key=categorical_key,
    key_dist='X_pca'
)

Initialize
Find nearest anchors
Normalize graph
Label transfer


In [16]:
for k, v in transfer_results.items():
    v.to_hdf(f'{k}_transfer.hdf', key='data')

In [17]:
integrator.save_transfer_results_to_adata(adata_merge, transfer_results)

## Save

In [18]:
adata_merge.write_h5ad('./adata/final.h5ad')

In [20]:
integrator

<ALLCools.integration.seurat_class.SeuratIntegration at 0x7f4451dab7d0>

In [19]:
integrator.save('integration')

TypeError: Can't implicitly convert non-string objects to strings

Above error raised while writing key 'L1_annot' of <class 'h5py._hl.group.Group'> to /