# Installing scvi-tools

In [None]:
!pip install scvi-tools

# Import all the necesary packages

In [None]:
import scvi
import numpy as np
import pandas as pd
import scanpy as sc
import os
import json
import loompy
import anndata
import leidenalg
import igraph as ig

In [None]:
from scvi.data import read_h5ad, read_loom
from scvi.model import SCVI
from scvi.external import SOLO

In [None]:
import session_info

In [None]:
session_info.show()

In [None]:
from scipy.sparse import csr_matrix

In [None]:
from matplotlib.pyplot import rc_context

In [None]:
sc.settings.verbosity = 4 
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')

# Load data and train the model

In [None]:
adata = sc.read_10x_mtx(
    "./pristina/",
    var_names='gene_symbols',
    cache=True)

In [None]:
adata.obs

In [None]:
adata = adata.copy()
scvi.model.SCVI.setup_anndata(adata)

In [None]:
vae = scvi.model.SCVI(adata)
vae.train()

In [None]:
solo = scvi.external.SOLO.from_scvi_model(vae)

In [None]:
solo.train(max_epochs=400, early_stopping=False)

In [None]:
solo_list= pd.DataFrame(solo.predict(include_simulated_doublets=False))

In [None]:
solo_list.to_excel("solo_ratio_multimappers 10.xlsx")

In [None]:
vae

## Uploading the data with cluster layers already calculated and check the location of the putative doublets

In [None]:
adata = anndata.read_h5ad('./pristina_atlas.h5ad')

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color='solo_doublet', size = 30)

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color='solo_singlet', size = 30)

In [None]:
lim= 1

In [None]:
adata.obs[adata.obs['solo_doublet'] > lim]

In [None]:
adata.obs['top_doublets'] = 'no'
adata.obs.loc[adata.obs['solo_doublet'] > lim, "top_doublets"] = "top_doublet"

In [None]:
with rc_context({'figure.figsize': (12, 12)}):
    sc.pl.umap(adata, color='top_doublets', size = 30)