# Example notebook (simulated data)

In [1]:
%load_ext autoreload
%autoreload 2

## Imports

In [2]:
from novaice.tl import ChemPertVAEModel, ChemPertMLPModel
import anndata as ad
import numpy as np
from lightning.pytorch.loggers import TensorBoardLogger

  from .autonotebook import tqdm as notebook_tqdm


## Data

Generate example data

In [3]:
# Create example data
n_samples = 100
n_genes = 500
embedding_dim = 768
gene_expr = np.random.randn(n_samples, n_genes)  # Gene expression
drug_emb = np.random.randn(n_samples, embedding_dim)  # Drug embeddings
adata = ad.AnnData(X=gene_expr)
adata.obsm["drug_embedding"] = drug_emb

## VAE

In [4]:
# Setup and train model
ChemPertVAEModel.setup_anndata(adata, drug_embedding_key="drug_embedding")
model = ChemPertVAEModel(adata)

# Create TensorBoard logger
tb_logger = TensorBoardLogger("logs", name="chempert_vae")

model.train(max_epochs=50,
            logger=tb_logger,
            log_every_n_steps=5, )
# Predict gene expression
predictions = model.predict_gene_expression()
# Get latent representation
latent = model.get_latent_representation()

[34mINFO    [0m Generating sequential column names                                                                        


  accelerator, lightning_devices, device = parse_device_args(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
TPU available: False, using: 0 TPU cores
/opt/homebrew/Caskroom/miniconda/base/envs/novaice/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:166: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/opt/homebrew/Caskroom/miniconda/base/envs/novaice/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=13` in the `DataLoader` to improve performance.
/opt/homebrew/Caskroom/miniconda/base/envs/novaice/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=5). Set a lower value for log_every_n_steps if you wan

Epoch 50/50: 100%|██████████| 50/50 [00:00<00:00, 122.74it/s, v_num=2, train_loss=682]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:00<00:00, 107.61it/s, v_num=2, train_loss=682]



In [None]:
# Setup and train model
ChemPertMLPModel.setup_anndata(adata, drug_embedding_key="drug_embedding")
model = ChemPertMLPModel(adata)
model.train(max_epochs=50,
            logger=tb_logger,
            log_every_n_steps=5, )
# Predict gene expression
predictions = model.predict_gene_expression()
# Get latent representation
latent = model.get_prediction_error()

[34mINFO    [0m Generating sequential column names                                                                        


  accelerator, lightning_devices, device = parse_device_args(
GPU available: True (mps), used: False
TPU available: False, using: 0 TPU cores
/opt/homebrew/Caskroom/miniconda/base/envs/novaice/lib/python3.11/site-packages/lightning/pytorch/trainer/setup.py:166: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.
/opt/homebrew/Caskroom/miniconda/base/envs/novaice/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=13` in the `DataLoader` to improve performance.
/opt/homebrew/Caskroom/miniconda/base/envs/novaice/lib/python3.11/site-packages/lightning/pytorch/loops/fit_loop.py:310: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=10). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 50/50: 100%|██████████| 50/50 [00:00<00:00, 138.65it/s, v_num=1, train_loss=468]

`Trainer.fit` stopped: `max_epochs=50` reached.


Epoch 50/50: 100%|██████████| 50/50 [00:00<00:00, 137.17it/s, v_num=1, train_loss=468]


In [5]:
model.get_prediction_error(method="r2")

array([ 0.08490299,  0.01149954,  0.03652435,  0.1205297 ,  0.03714587,
        0.06657201,  0.15247999,  0.10168385,  0.03969806, -0.01599456,
       -0.00624166,  0.09742456,  0.10534384,  0.07294985,  0.11619731,
        0.10446647,  0.09934682,  0.13611974,  0.03372545,  0.09709959,
        0.05531964,  0.15708552,  0.06008263,  0.09644231,  0.04263443,
        0.05792092,  0.06905067,  0.10266561,  0.15244341,  0.13504979,
        0.07505822,  0.10024156, -0.03412777,  0.10176546, -0.04269175,
        0.11077628,  0.09890586, -0.01874888,  0.0980655 ,  0.10427356,
        0.06158306,  0.09025299,  0.06647262,  0.11493827, -0.00656793,
       -0.00371751, -0.03355224,  0.05029252,  0.05604016,  0.11657432,
        0.12093469,  0.12052019,  0.17429232,  0.10833763,  0.13482389,
        0.11187175,  0.06493888,  0.07060604,  0.09511399,  0.04677574,
        0.02701685,  0.10575106,  0.09443526,  0.04254399,  0.09967474,
        0.08944699,  0.13641833,  0.03353988,  0.09925881,  0.05