#### Importing all the required **Python** and **R** libraries 

In [None]:
import os
import pathlib
import json
import pandas as pd
import numpy as np
import scanpy as sc
import warnings
import scarches as sca
warnings.filterwarnings("ignore")
import anndata as ad

import decoupler as dc

import sys
sys.path.append('../scripts')
from scarches.models import SCVI, SCANVI

%load_ext autoreload
%autoreload 2
#%load_ext lab_black

In [None]:
sc.set_figure_params(frameon=False)
sc.settings.figdir = '/home/daniele/Code/scmouse_atlas/reports/figures/'

#### Read and concatenate

In [None]:
adata = sc.read_h5ad('/mnt/storage/Daniele/atlases/mouse/01_mouse_larry_barcoded_raw.h5ad')

In [None]:
for key in adata.obs.columns:
    adata.obs[key] = adata.obs[key].astype(str).astype('category')

In [None]:
manual_genes_human = pd.read_csv('../../../supplementary_data/human/human_manual_genes.csv')

In [None]:
manual_genes_human.columns = ['genesymbol','manual']
manual_genes_human['pathway'] = '_' #dummy for decoupler
manual_genes_human = manual_genes_human[manual_genes_human['manual']]

In [None]:
mouse_manual_genes = dc.translate_net(manual_genes_human, target_organism='mouse')

In [None]:
man_genes = list(set(mouse_manual_genes['genesymbol'].values).intersection(adata.var_names))

In [None]:
adata_man_genes = adata[:, man_genes].copy()

#### SCVI

In [None]:
SCVI.setup_anndata(adata_man_genes, batch_key='donor_id')
vae=SCVI(adata_man_genes)

In [None]:
vae.train()

In [None]:
adata_man_genes.obsm['X_scVI'] = vae.get_latent_representation()
adata.obsm['X_scVI'] = adata_man_genes.obsm['X_scVI']

In [None]:
sc.pp.neighbors(adata, use_rep='X_scVI', n_neighbors=15)
sc.tl.umap(adata)
sc.pl.umap(adata, color='donor_id')

In [None]:
adata.write_h5ad('/mnt/storage/Daniele/atlases/mouse/02_mouse_larry_barcoded_integrated_scvi.h5ad')