In [None]:
import pandas as pd
import numpy as np
import scanpy as sc
import warnings
import scarches as sca
warnings.filterwarnings("ignore")


import sys
sys.path.append('../scripts')
%load_ext autoreload
%autoreload 2
#%load_ext lab_black

In [None]:
adata = sc.read_h5ad('/mnt/storage/Daniele/atlases/mouse/14_mouse_final_annotation.h5ad')

In [None]:
adata_manual = adata[:, adata.var['manual_gene']].copy()

In [None]:
batch_key = 'donor_id'
celltype_key = 'Level_4_knn'

In [None]:
# hotfix
adata_manual.obs[batch_key] = adata_manual.obs[batch_key].astype(str).astype('category')
sca.models.SCVI.setup_anndata(adata_manual, layer='binned_data', batch_key=batch_key, labels_key=celltype_key)


In [None]:
vae = sca.models.SCVI(
    adata_manual,
    n_layers=2,
    encode_covariates=True,
    deeply_inject_covariates=False,
    use_layer_norm="both",
    use_batch_norm="none",
)

In [None]:
vae.train(max_epochs=50)

In [None]:
scanvae = sca.models.SCANVI.from_scvi_model(vae, unlabeled_category = "Unknown")
scanvae.train(max_epochs=10)

In [None]:
adata_manual.obs['predictions'] = scanvae.predict()
print("Acc: {}".format(np.mean(adata_manual.obs.predictions == adata_manual.obs.Level_4_knn)))

In [None]:
adata_manual.obsm['scANVI_emb_final'] = scanvae.get_latent_representation(adata_manual)

In [None]:
adata.obsm['scANVI_emb_final'] = adata_manual.obsm['scANVI_emb_final'].copy()

In [None]:
from sklearn_ann.kneighbors.annoy import AnnoyTransformer
sc.pp.neighbors(adata, transformer=AnnoyTransformer(15), use_rep='scANVI_emb_final')

In [None]:
sc.tl.umap(adata, min_dist=0.25)

In [None]:
adata.write_h5ad('/mnt/storage/Daniele/atlases/mouse/15_mouse_final_integration.h5ad')

In [None]:
scanvae.save('/home/daniele/Code/github_synced/PDAC/models/mouse/scANVI', overwrite=True)

# create clean low level annotations

In [None]:
adata.obs.Level_4_knn.replace('Malignant Cell - Hihgly Invasive', 'Malignant Cell - Highly Invasive', inplace=True)
adata.obs.Level_4_knn.replace('Acinar idlling', 'Acinar Idling', inplace=True)

In [None]:
adata.obs['Level_4_final'] = adata.obs['Level_4_knn'].copy()

In [None]:
celltype_df = pd.read_csv('../../../supplementary_data/celltype_annotation.csv')
dict_level4_to_level3 = dict(zip(celltype_df['Level_4'], celltype_df['Level_3']))
dict_level3_to_level2 = dict(zip(celltype_df['Level_3'], celltype_df['Level_2']))
dict_level2_to_level1 = dict(zip(celltype_df['Level_2'], celltype_df['Level_1']))

In [None]:
adata.obs['Level_4_final'] = adata.obs['Level_4_final'].astype('category')
adata.obs['Level_3_final'] = adata.obs['Level_4_final'].replace(dict_level4_to_level3).astype('category')
adata.obs['Level_2_final'] = adata.obs['Level_3_final'].replace(dict_level3_to_level2).astype('category')
adata.obs['Level_1_final'] = adata.obs['Level_2_final'].replace(dict_level2_to_level1).astype('category')


In [None]:
adata.write_h5ad('/mnt/storage/Daniele/atlases/mouse/15_mouse_final_integration.h5ad')