In [1]:
!pip install scvi-colab

from scvi_colab import install

# default
install()

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting scvi-colab
  Downloading scvi_colab-0.11.0-py3-none-any.whl (4.3 kB)
Collecting rich
  Downloading rich-12.6.0-py3-none-any.whl (237 kB)
[K     |████████████████████████████████| 237 kB 13.9 MB/s 
[?25hCollecting commonmark<0.10.0,>=0.9.0
  Downloading commonmark-0.9.1-py2.py3-none-any.whl (51 kB)
[K     |████████████████████████████████| 51 kB 9.8 MB/s 
Installing collected packages: commonmark, rich, scvi-colab
Successfully installed commonmark-0.9.1 rich-12.6.0 scvi-colab-0.11.0
[34mINFO    [0m scvi-colab: Installing scvi-tools.                                                                        
[34mINFO    [0m scvi-colab: Install successful. Testing import.                                                           


INFO:pytorch_lightning.utilities.seed:Global seed set to 0
  new_rank_zero_deprecation(
  return new_rank_zero_deprecation(*args, **kwargs)


The scvi-colab python package is a lightweight installer for scvi and related tools.

In [None]:
from google.colab import files
files.upload()

files.upload() pulls in local files, in this case an h5ad file with the uncorrected data from the 3 studies. This dataset has been filtered to retain the top 2k variable genes. 

In [33]:
import scvi
import scanpy as sc
import numpy as np

scvi.settings.seed = 20221217

adata = sc.read_h5ad("hvg_uc_sce.h5ad")
adata.layers['counts'] = adata.layers['counts'].tocsr()

scvi.model.SCVI.setup_anndata(adata,
                              layer = 'counts',
                              batch_key = 'study')

model = scvi.model.SCVI(adata, n_layers = 2, n_latent = 20, gene_likelihood="nb")
model.train(use_gpu=True)
latent = model.get_latent_representation()


Next we'll save this latent representation (~20k by 20) as csv for loading back into R

In [34]:
np.savetxt('scvi.csv.gz', latent, delimiter=',')

In [41]:
lvae = scvi.model.SCANVI.from_scvi_model(
  model,
  adata=adata,
  unlabeled_category="Unknown",
  labels_key="coarse_cell_type"
)

# no label subsampling for this smaller dataset
lvae.train(max_epochs=20)

latent_scanvi = lvae.get_latent_representation(adata)
np.savetxt('scanvi.csv.gz', latent_scanvi, delimiter=',')

[34mINFO    [0m Training for [1;36m20[0m epochs.                                                                                   


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Epoch 20/20: 100%|██████████| 20/20 [01:45<00:00,  5.50s/it, loss=1.08e+03, v_num=1]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 20/20: 100%|██████████| 20/20 [01:45<00:00,  5.27s/it, loss=1.08e+03, v_num=1]
