# Train different variants of the model

Save both full models and predictions in format as required for NeurIPS data



In [1]:
# Conda environment: dynamic_LIAM_challenge_reproducibility

In [2]:
# Imports
import liam_NeurIPS2021_challenge_reproducibility
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import anndata as ad
import scvi
from sklearn.metrics import silhouette_score, silhouette_samples

Global seed set to 0
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


In [3]:
torch.cuda.is_available()

True

In [4]:
model_param_mapping = {}

In [5]:
model_param_mapping['BAVAE_sample_100'] = {}
model_param_mapping['BAVAE_sample_100']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'sample'}
model_param_mapping['BAVAE_sample_100']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20}

In [6]:
model_param_mapping['BAVAE_buffer_100'] = {}
model_param_mapping['BAVAE_buffer_100']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'buffer'}
model_param_mapping['BAVAE_buffer_100']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20}

In [7]:
model_param_mapping['BAVAE_sample_100_x5'] = {}
model_param_mapping['BAVAE_sample_100_x5']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'sample'}
model_param_mapping['BAVAE_sample_100_x5']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 5.0}

In [8]:
model_param_mapping['BAVAE_buffer_100_x5'] = {}
model_param_mapping['BAVAE_buffer_100_x5']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'buffer'}
model_param_mapping['BAVAE_buffer_100_x5']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 5.0}

In [9]:
model_param_mapping['BAVAE_sample_100_x10'] = {}
model_param_mapping['BAVAE_sample_100_x10']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'sample'}
model_param_mapping['BAVAE_sample_100_x10']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 10.0}

In [10]:
model_param_mapping['BAVAE_buffer_100_x10'] = {}
model_param_mapping['BAVAE_buffer_100_x10']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'buffer'}
model_param_mapping['BAVAE_buffer_100_x10']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 10.0}

In [11]:
model_param_mapping['BAVAE_sample_100_x25'] = {}
model_param_mapping['BAVAE_sample_100_x25']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'sample'}
model_param_mapping['BAVAE_sample_100_x25']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 25.0}

In [12]:
model_param_mapping['BAVAE_buffer_100_x25'] = {}
model_param_mapping['BAVAE_buffer_100_x25']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'buffer'}
model_param_mapping['BAVAE_buffer_100_x25']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 25.0}

In [13]:
model_param_mapping['BAVAE_sample_100_x50'] = {}
model_param_mapping['BAVAE_sample_100_x50']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'sample'}
model_param_mapping['BAVAE_sample_100_x50']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 50.0}

In [14]:
model_param_mapping['BAVAE_buffer_100_x50'] = {}
model_param_mapping['BAVAE_buffer_100_x50']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'buffer'}
model_param_mapping['BAVAE_buffer_100_x50']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 50.0}

In [15]:
model_param_mapping['BAVAE_sample_100_x100'] = {}
model_param_mapping['BAVAE_sample_100_x100']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'sample'}
model_param_mapping['BAVAE_sample_100_x100']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 100.0}

In [16]:
model_param_mapping['BAVAE_buffer_100_x100'] = {}
model_param_mapping['BAVAE_buffer_100_x100']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'buffer'}
model_param_mapping['BAVAE_buffer_100_x100']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 100.0}

In [17]:
model_param_mapping['BAVAE_sample_100_x1000'] = {}
model_param_mapping['BAVAE_sample_100_x1000']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'sample'}
model_param_mapping['BAVAE_sample_100_x1000']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 1000.0}

In [18]:
model_param_mapping['BAVAE_buffer_100_x1000'] = {}
model_param_mapping['BAVAE_buffer_100_x1000']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': 'buffer'}
model_param_mapping['BAVAE_buffer_100_x1000']['Liam_params'] = {'adversarial_training': True, 'n_latent': 20, 'factor_adversarial_loss': 1000.0}

In [19]:
# also model ATAC lib size (but batch independent)
model_param_mapping['VAE_100'] = {}
model_param_mapping['VAE_100']['setup_anndata_params'] = {'chrom_acc_obsm_key': 'ATAC', 'batch_key': None}
model_param_mapping['VAE_100']['Liam_params'] = {'n_latent': 20, 'dispersion_gex': 'gene', 'dispersion_atac': 'constant'}

In [None]:
for model in model_param_mapping.keys():
    print(model)
    scvi._settings.ScviConfig()
    input = ad.read_h5ad("./../../data/derived/Mimitou2021/DOGMA_seq/preprocessed_DOGMA.h5ad")

    input.obs["sample"] = input.obs["batch"]
    input.obs["buffer"] = input.obs["batch"].apply(lambda x: x.split("_")[1])
    input.obs["condition"] = input.obs["batch"].apply(lambda x: x.split("_")[2])

    liam_NeurIPS2021_challenge_reproducibility.Liam.setup_anndata(
            input,
            **model_param_mapping[model]['setup_anndata_params']
        )

    vae = liam_NeurIPS2021_challenge_reproducibility.Liam(input, **model_param_mapping[model]['Liam_params'])

    vae.train(train_size=0.95, validation_size=0.05,
                  batch_size=128, early_stopping=True, save_best=True, early_stopping_patience=10)

    input.obsm["embedding"] = vae.get_latent_representation()

    sc.pp.neighbors(input, use_rep="embedding")
    sc.tl.umap(input)
    sc.tl.leiden(input, key_added="leiden_embedding")
    
    vae.save("./../../models/TCU/{}".format(model), save_anndata=True)
    
    del vae
    del input
    