## Denoising and Batch Correction of RNAPII Count Matrix with PeakVI

In [None]:
import os
import anndata
import pickle
import pandas as pd
import numpy as np
import scvi
import scanpy as sc
import torch
from scipy.io import mmread

In [None]:
torch.cuda.is_available()

In [None]:
torch.set_float32_matmul_precision("high")
save_dir = tempfile.TemporaryDirectory()

In [None]:
os.chdir("data/mtx_filtered/")

matin = mmread('RNAP2_5sets_count_codingreg.mtx') 
matin = matin.tocsr()
matin = matin.transpose()
obsin = pd.read_csv('RNAP2_5sets_obs_codingreg.csv',index_col=0)
varin = pd.read_csv('RNAP2_5sets_var_codingreg.csv',index_col=0)

adata = anndata.AnnData(X = matin, obs = obsin, var = varin)
del(matin,obsin,varin)

In [None]:
scvi.model.PEAKVI.setup_anndata(adata,batch_key = 'batch')

In [None]:
model = scvi.model.PEAKVI(adata,n_hidden=20,n_latent=5)
model.to_device("cuda:0")
model.train()

In [None]:
adata.obsm['X_peakvi'] = model.get_latent_representation()
sc.pp.neighbors(adata, use_rep='X_peakvi')
sc.tl.umap(adata, min_dist=0.3)

In [None]:
sc.pl.umap(
    adata,
    color=["sample", "pair"],
    frameon=False,
    size = 15,
)

In [None]:
os.chdir("data/peakvi_out/")

np.savetxt("RNAPII_latent.csv",adata.obsm['X_peakvi'])
np.savetxt("RNAPII_umap.csv",adata.obsm["X_umap"])

obs = adata.obs
obs.to_csv('RNAPII_obs.csv')
var = adata.var
var.to_csv("RNAPII_var.csv")

import pickle
with open('RNAPII_model.pkl', 'wb') as f:
    pickle.dump(model, f)