# Export embeddings from trained models ...
... for uploading/sharing small intermediate data from which the publication figures can be recreated.

In [1]:
# Conda environment: dynamic_LIAM_challenge_reproducibility

In [1]:
# Imports
import anndata as ad

In [2]:
models = []

In [3]:
for factor in ['_x1', '_x5', '_x10', '_x25', '_x50', '_x100']:
    if factor == '_x1':
        models += ['BAVAE_sample_100_extended_use_case',
                   'VAE_100_extended_use_case']
    else:
        models += ['BAVAE_sample_100_extended_use_case{}'.format(factor)]

In [4]:
models

['BAVAE_sample_100_extended_use_case',
 'VAE_100_extended_use_case',
 'BAVAE_sample_100_extended_use_case_x5',
 'BAVAE_sample_100_extended_use_case_x10',
 'BAVAE_sample_100_extended_use_case_x25',
 'BAVAE_sample_100_extended_use_case_x50',
 'BAVAE_sample_100_extended_use_case_x100']

In [5]:
for model in models:
    # Load model
    adata = ad.read_h5ad('./../../models/ETCU/{}/adata.h5ad'.format(model))
    # Drop 'ATAC' from obsm (large)
    keys_to_select = ['X_umap', 'embedding']
    obsm_subset = ({key: adata.obsm[key] for key in keys_to_select})
    # Select embedding as X for anndata, keep all other meta data (precomputed NNs, UMAP coordinates, ADT, etc.)
    embedding = ad.AnnData(adata.obsm['embedding'], obs=adata.obs, obsm=obsm_subset, uns=adata.uns, obsp=adata.obsp)
    # Save embeddings to Predictions
    embedding.write_h5ad("Predictions/{}.prediction.h5ad".format(model))