## Train OntoVAE model

In [None]:
# import packages
import sys
import scanpy as sc
sys.path.append('/workspace')
from cobra_ai.module.ontobj import *
from cobra_ai.module.utils import *
from cobra_ai.model.onto_vae import *

In [3]:
# load ontobj
ontobj = Ontobj()
ontobj.load('/workspace/cobra_ai/data/GO/GO.ontobj')

In [None]:
# load anndata (Kang PBMC data - log normalized)
adata = sc.read_h5ad('/workspace/cobra_ai/data/train_pbmc.h5ad')

In [5]:
# setup the anndata
adata = setup_anndata_ontovae(adata, ontobj)

In [5]:
# initialize the model
model = OntoVAE(adata)

scOntoVAE(
  (encoder): Encoder(
    (encoder): ModuleList(
      (0): Sequential(
        (0): Linear(in_features=19469, out_features=1755, bias=True)
        (1): BatchNorm1d(1755, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Dropout(p=0.2, inplace=False)
        (3): ReLU()
      )
    )
    (mu): Sequential(
      (0): Linear(in_features=1755, out_features=1755, bias=True)
      (1): Dropout(p=0.5, inplace=False)
    )
    (logvar): Sequential(
      (0): Linear(in_features=1755, out_features=1755, bias=True)
      (1): Dropout(p=0.5, inplace=False)
    )
  )
  (decoder): OntoDecoder(
    (decoder): ModuleList(
      (0): Sequential(
        (0): Linear(in_features=1755, out_features=42, bias=True)
      )
      (1): Sequential(
        (0): Linear(in_features=1797, out_features=213, bias=True)
      )
      (2): Sequential(
        (0): Linear(in_features=2010, out_features=621, bias=True)
      )
      (3): Sequential(
        (0): Linear(in_featur

In [8]:
# train the model
model.train_model('/workspace/cobra_ai/test',   
                     lr=1e-4,                                 
                     kl_coeff=1e-4,                           
                     batch_size=128,                          
                     epochs=5)      

Epoch 1 of 5


100%|██████████| 106/106 [00:05<00:00, 17.78it/s]
100%|██████████| 27/27 [00:00<00:00, 105.00it/s]


New best model!
Train Loss: 38731.7956
Val Loss: 28209.4666
Epoch 2 of 5


100%|██████████| 106/106 [00:05<00:00, 17.84it/s]
100%|██████████| 27/27 [00:00<00:00, 103.26it/s]


New best model!
Train Loss: 27007.4509
Val Loss: 21975.3391
Epoch 3 of 5


100%|██████████| 106/106 [00:05<00:00, 17.81it/s]
100%|██████████| 27/27 [00:00<00:00, 102.21it/s]


New best model!
Train Loss: 24083.1830
Val Loss: 20569.4857
Epoch 4 of 5


100%|██████████| 106/106 [00:05<00:00, 17.80it/s]
100%|██████████| 27/27 [00:00<00:00, 103.08it/s]


New best model!
Train Loss: 22951.9595
Val Loss: 19957.0698
Epoch 5 of 5


100%|██████████| 106/106 [00:05<00:00, 17.83it/s]
100%|██████████| 27/27 [00:00<00:00, 103.38it/s]


New best model!
Train Loss: 22134.4548
Val Loss: 19398.7668


In [6]:
# load the best model
model = OntoVAE.load(adata, '/workspace/cobra_ai/test')

In [None]:
# get latent space embedding
embedding = model.to_latent(adata)

In [None]:
# compute pathway activities
act = model.get_pathway_activities()

## Train COBRA model

To train a COBRA model, one needs to specify the covariates in the setup function and then call the COBRA model instead of OntoVAE.

In [None]:
# import package
from cobra_ai.model.cobra import *

In [None]:
# prepare anndata
adata = setup_anndata_ontovae(adata,
                              ontobj,
                              cobra_keys = ['condition', 'celltype'])

In [None]:
# create model
model = COBRA(adata)