In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import torch
import anndata
import scanpy as sc
import scvi
from cell2location.models import Cell2location, RegressionModel
from cell2location.plt import plot_spatial
from cell2location.utils import select_slide
from cell2location.utils.filtering import filter_genes

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

In [None]:
mka = sc.read_h5ad("/exports/archive/hg-groep-peters/Healthy_Mouse_Atlas_Claudio/KidneyAtlas/h5ad/atlas_full_raw.h5ad")

In [None]:
zimm = sc.read_h5ad("/exports/archive/hg-groep-peters/Healthy_Mouse_Atlas_Claudio/KidneyAtlas/h5ad/zimmerman_raw.h5ad")

In [None]:
sc.pp.calculate_qc_metrics(zimm, inplace=True)

In [None]:
zimm = zimm[zimm.obs.group.isin(['cmsham', 'agedcontrols', 'contir'])].copy()

In [None]:
zimm.obs['Origin'] = 'Zimmerman22'
zimm.obs['Source'] = 'Cell'

In [None]:
zimm.obs['pct_counts_mt'] = zimm.obs['percent.mt']

In [None]:
mka = mka.concatenate(zimm, join='inner')

In [None]:
mka

In [None]:
humphreys_fibrosis = sc.read_h5ad("/exports/archive/hg-groep-peters/snRNAseq_Cesare_Claudio_Early_Mod_Severe/snRNAseq/humphreys_processed.h5ad")

In [None]:
humphreys_fibrosis.obs.celltype0421

In [None]:
humphreys_fibrosis = humphreys_fibrosis[humphreys_fibrosis.obs.celltype0421.isin(['PT-AcInj','PT-FR','PT-Inj','PT-R', 'Fib', 'Myofib'])].copy()

In [None]:
humphreys_fibrosis = humphreys_fibrosis[humphreys_fibrosis.obs['sample'] != 'Health'].copy()

In [None]:
# Filter the anndata object based on the condition in var_names
humphreys_fibrosis = humphreys_fibrosis[:, ~humphreys_fibrosis.var_names.str.contains('intron')].copy()

In [None]:
humphreys_fibrosis.var_names_make_unique()

In [None]:
humphreys_fibrosis.var_names

In [None]:
humphreys_fibrosis.X = humphreys_fibrosis.layers['raw']

In [None]:
# Step 1: Add the new category
humphreys_fibrosis.obs['celltype0421'] = humphreys_fibrosis.obs['celltype0421'].cat.add_categories(['Podo'])

# Step 2: Assign the new value
humphreys_fibrosis.obs.loc[humphreys_fibrosis.obs['celltype0421'] == 'Pod', 'celltype0421'] = 'Podo'

In [None]:
humphreys_fibrosis.obs["Origin"] = 'Humphreys23'
humphreys_fibrosis.obs["Source"] = 'Nuclei'

In [None]:
# mitochondrial genes, "MT-" for human, "Mt-" for mouse
humphreys_fibrosis.var["mt"] = humphreys_fibrosis.var_names.str.startswith("mt-")
# ribosomal genes
humphreys_fibrosis.var["ribo"] = humphreys_fibrosis.var_names.str.startswith(("Rps", "Rpl"))

In [None]:
sc.pp.calculate_qc_metrics(
    humphreys_fibrosis, qc_vars=["mt", "ribo"], inplace=True
)

In [None]:
humphreys_fibrosis.var_names

In [None]:
mka = mka.concatenate([humphreys_fibrosis], join='inner')

In [None]:
mka

In [None]:
adata_upd = sc.read_h5ad("/exports/archive/hg-groep-peters/snRNAseq_Cesare_Claudio_Early_Mod_Severe/snRNAseq/latest_models/atlas_lvae_model/avg_posterior_samples.h5ad")

In [None]:
adata_upd.obs['pct_counts_mt']

In [None]:
mka = mka[mka.obs_names.isin(adata_upd.obs_names)].copy()

In [None]:
obs_to_keep = mka.obs[['total_counts', 'n_genes_by_counts', 'pct_counts_mt']]

In [None]:
obs_to_keep

In [None]:
mka.obs = adata_upd.obs

In [None]:
mka.obs[['total_counts', 'n_genes_by_counts', 'pct_counts_mt']] = obs_to_keep

In [None]:
del adata_upd

In [None]:
muto_pkd = sc.read_h5ad("/exports/archive/hg-groep-peters/Spatial_Transcriptomics_Snowball_Sevtap/Muto_PKD_2024/GSE268494_RNA_seurat.h5ad")

In [None]:
muto_pkd = muto_pkd[muto_pkd.obs['disease'].str.contains('pkd')].copy()

In [None]:
muto_pkd = muto_pkd[~muto_pkd.obs['celltype'].isin(['URO', 'FAT', 'Myel'])].copy()

In [None]:
muto_pkd_PT_metadata = pd.read_csv("/exports/archive/hg-groep-peters/Spatial_Transcriptomics_Snowball_Sevtap/Muto_PKD_2024/GSE268494_PKDaggr_GEO_FRPTC_metadata.csv", index_col=0)

In [None]:
# Convert to string dtype
muto_pkd.obs['celltype'] = muto_pkd.obs['celltype'].astype(str)

# Assign new values
common_cells = muto_pkd.obs_names.intersection(muto_pkd_PT_metadata.index)
muto_pkd.obs.loc[common_cells, 'celltype'] = muto_pkd_PT_metadata.loc[common_cells, 'subtype']

# (Optional) Convert back to categorical if desired
muto_pkd.obs['celltype'] = muto_pkd.obs['celltype'].astype('category')

In [None]:
translation_dict = {
    'ATL': 'ATL',
    'Bcell': 'B lymph',
    'CNT': 'CNT',
    'DCT': 'DCT',
    'DTL1': 'DTL',
    'DTL2': 'DTL',
    'ENDO': 'Endo',
    'FIB': 'Fib',
    'FRPTC1': 'PT-FR',
    'FRPTC2': 'PT-FR',
    'FRPTC3': 'PT-FR',
    'ICA': 'ICA',
    'ICB': 'ICB',
    'PC1': 'PC',
    'PC2': 'PC',
    'PEC': 'PEC',
    'PODO': 'Podo',
    'PTS1': 'PTS1',
    'PTS2': 'PTS2',
    'PTS3': 'PTS3',
    'TAL': 'TAL',
    'Tcell': 'T lymph',
    'Trans-PTC': 'Trans-PTC'
}

In [None]:
muto_pkd.obs['celltype'] = muto_pkd.obs['celltype'].replace(translation_dict)

In [None]:
muto_pkd.obs['celltype']

In [None]:
muto_pkd.obs['Predicted_Celltype'] = muto_pkd.obs['celltype']

In [None]:
# mitochondrial genes, "MT-" for human, "Mt-" for mouse
muto_pkd.var["mt"] = muto_pkd.var_names.str.startswith("mt-")
sc.pp.calculate_qc_metrics(
    muto_pkd, qc_vars=["mt"], inplace=True
)

In [None]:
mka = mka.concatenate(muto_pkd, join='inner')

In [None]:
del mka.var

In [None]:
mka.obs['Origin'] = mka.obs['Origin'].cat.add_categories(['Muto24'])
mka.obs['Origin'] = mka.obs['Origin'].fillna('Muto24')
mka.obs['Source'] = mka.obs['Source'].fillna('Nuclei')

In [None]:
sc.pp.calculate_qc_metrics(mka, inplace=True)

In [None]:
sc.pl.violin(mka, 'total_counts', groupby='Origin')

In [None]:
mka.write_h5ad("/exports/archive/hg-groep-peters/snRNAseq_Cesare_Claudio_Early_Mod_Severe/snRNAseq/mka_extended_pkd_innerjoin.h5ad")

In [None]:
mka = sc.read_h5ad("/exports/archive/hg-groep-peters/snRNAseq_Cesare_Claudio_Early_Mod_Severe/snRNAseq/mka_extended_pkd_innerjoin.h5ad")

## scVI / scANVI

In [None]:
scvi.model.SCVI.setup_anndata(mka, batch_key="Origin", continuous_covariate_keys=['pct_counts_mt'], 
                      categorical_covariate_keys=['Source'])

In [None]:
# vae = scvi.model.SCVI(mka, n_layers=2, n_latent=26, gene_likelihood='nb', dropout_rate=0.09672091885923559)

In [None]:
# for reference mapping
# vae = scvi.model.SCVI(mka, 
#                       n_layers=2, 
#                       use_layer_norm="both",
#                       use_batch_norm="none",
#                       encode_covariates=True,
#                       n_latent=26, 
#                       gene_likelihood="nb", 
#                       dropout_rate=0.09672091885923559)

In [None]:
vae = scvi.model.SCVI(mka, 
                      n_layers=2, 
                      use_layer_norm="both",
                      use_batch_norm="none",
                      encode_covariates=True,
                      n_latent=26, 
                      gene_likelihood="nb", 
                      dropout_rate=0.2)

In [None]:
vae.train()

In [None]:
#vae.train(plan_kwargs={"lr":0.0013153399994028092})

In [None]:
mka.obsm["X_scVI"] = vae.get_latent_representation()

In [None]:
sc.pp.neighbors(mka, use_rep="X_scVI")
# sc.tl.leiden(adata_upd)
sc.tl.umap(mka)

In [None]:
sc.pl.umap(
    mka,
    color="Origin",
    frameon=False,
    show=False,
    save='MKA_extended_byOrigin.pdf'
)

In [None]:
sc.pl.umap(
    mka,
    color="Predicted_Celltype",
    frameon=False,
    show=False,
    save='MKA_extended_byCelltype.pdf'
)

In [None]:
vae.save("/exports/humgen/cnovellarausell/SevtapSpatial/Models/SCVI_zimmerman_humphreys_muto", save_anndata=True, overwrite=True)

In [None]:
mka.obs.Predicted_Celltype.cat.add_categories(new_categories='Unknown', inplace=True)

In [None]:
mka.obs.loc[mka.obs.Origin.isin(['Conway20', 'Hinze20']), 'Predicted_Celltype'] = 'Unknown'

In [None]:
mka.obs.groupby('Origin')['Predicted_Celltype'].value_counts()

In [None]:
mapping = {
    'Lyc6 high Macrophages': 'Macro',
    'Macrophages': 'Macro',
    'Lyc6 low Macrophages': 'Macro',
    'Mrc1+ Resident Macrophages': 'Macro',
    'Resident Macrophages': 'Macro',
    'Spp1+ Resident Macrophages ': 'Macro',
    'Monocyte/DC': 'DC',
    'Dendritic': 'DC',
    'B1 B lymph': 'B lymph',
    'CD4+ T lymph': 'T lymph',
    'CD4+ T regs': 'T regs',
    'CD4+ Th17': 'Th17',
    'Gzma low NK': 'NK',
    'Gzma+ CD8+ T lymph': 'T lymph',
    'Gzma+ NK': 'NK',
    'Memory B lymph': 'B lymph',
    'NKT1': 'NK',
    'T1 B lymph': 'B lymph',
    'T3/Follicular B lymph': 'B lymph',
    'Th17': 'T lymph',
'Mesanglial cells': 'MC',
'IC': 'IC',
    'FR-PT + Immune': 'FR-PT + Immune',
    'Immune': 'Immune',
}

mka.obs["Predicted_Celltype_lowres"] = mka.obs["Predicted_Celltype"].replace(mapping)

In [None]:
mka.obs["Predicted_Celltype_lowres"]

In [None]:
SCANVI_LABELS_KEY = "labels_scanvi"

mka.obs[SCANVI_LABELS_KEY] = mka.obs['Predicted_Celltype_lowres'].values

In [None]:
lvae = scvi.model.SCANVI.from_scvi_model(
    vae,
    adata=mka,
    labels_key=SCANVI_LABELS_KEY,
    unlabeled_category='Unknown'
)

In [None]:
lvae.train(max_epochs=20, n_samples_per_label=100)

In [None]:
lvae.save("/exports/humgen/cnovellarausell/SevtapSpatial/Models/SCANVI_zimmerman_humphreys_muto_lowres", save_anndata=True, overwrite=True)

In [None]:
lvae = scvi.model.SCANVI.load("/exports/humgen/cnovellarausell/SevtapSpatial/Models/SCANVI_zimmerman_humphreys_muto_lowres")

In [None]:
mka = lvae.adata.copy()

In [None]:
mka.obsm["X_scANVI"] = lvae.get_latent_representation()
sc.pp.neighbors(mka, use_rep="X_scANVI")
sc.tl.umap(mka)

In [None]:
sc.pl.umap(
    mka,
    color=["Predicted_Celltype_lowres"],
    frameon=False,
    show=False,
    size=2,
    save='MKA_extended_scANVI_byCelltype.pdf'
)

In [None]:
sc.pl.umap(
    mka,
    color=["Origin"],
    frameon=False,
    show=False,
    size=2,
    save='MKA_extended_scANVI_byOrigin.pdf'
)

In [None]:
mka.var_names

In [None]:
sc.pl.umap(
    mka,
    color=["Disp1"],
    frameon=False,
    show=False,
    size=2,
)

In [None]:
latent = lvae.get_latent_representation(mka)
np.savetxt("/exports/humgen/cnovellarausell/SevtapSpatial/Models/SCANVI_zimmerman_humphreys_muto_lowres/MKA_extended_scANVI_latent_space.csv", latent, delimiter=",")
pd.DataFrame(mka.obs['Predicted_Celltype_lowres']).to_csv("/exports/humgen/cnovellarausell/SevtapSpatial/Models/SCANVI_zimmerman_humphreys_muto_lowres/cell_types.csv")

In [None]:
mka.obs.Source

In [None]:
# Assuming `mka` is your AnnData object
annotations = pd.DataFrame(
    {
        'Predicted_Celltype_lowres': mka.obs['Predicted_Celltype_lowres'],
        'Batch': mka.obs['Origin']  # Add the 'Origin' column
    },
    index=mka.obs_names
)

# Set the index name to 'barcode'
annotations.index.name = 'barcode'

# Save to CSV
annotations.to_csv(
    '/exports/humgen/cnovellarausell/SevtapSpatial/cytospace/MKA_extended_annotations_lowres_withBatch.csv',
    sep=','
)

In [None]:
# Assuming `mka` is your AnnData object
annotations = pd.DataFrame(
    {
        'Predicted_Celltype_highres': mka.obs['Predicted_Celltype'],
        'Batch': mka.obs['Origin']  # Add the 'Origin' column
    },
    index=mka.obs_names
)

# Set the index name to 'barcode'
annotations.index.name = 'barcode'

# Save to CSV
annotations.to_csv(
    '/exports/humgen/cnovellarausell/SevtapSpatial/cytospace/MKA_extended_annotations_highres_withBatch.csv',
    sep=','
)

In [None]:
annotations = pd.DataFrame(mka.obs['Predicted_Celltype'], columns=['Predicted_Celltype'], index=mka.obs_names)
annotations.index.name = 'barcode'
annotations.to_csv('/exports/humgen/cnovellarausell/SevtapSpatial/cytospace/MKA_extended_annotations_highres.csv', sep=',')

In [None]:
annotations = pd.DataFrame(mka.obs['Predicted_Celltype_lowres'], columns=['Predicted_Celltype_lowres'], index=mka.obs_names)
annotations.index.name = 'barcode'
annotations.to_csv('/exports/humgen/cnovellarausell/SevtapSpatial/cytospace/MKA_extended_annotations_lowres.csv', sep=',')

In [None]:
adata_upd = sc.read_h5ad("/exports/archive/hg-groep-peters/Healthy_Mouse_Atlas_Claudio/KidneyAtlas/h5ad/atlas_full_SCVI_SCANVI_Zimmerman.h5ad")

In [None]:
adata_upd

In [None]:
mka = mka[mka.obs_names.isin(adata_upd.obs_names.to_list())].copy()

In [None]:
mka.raw

In [None]:
adata_upd = adata_upd.raw.to_adata()

In [None]:
humphreys_fibrosis = sc.read_h5ad("/exports/humgen/cnovellarausell/snRNAseq/humphreys_processed.h5ad")

In [None]:
humphreys_fibrosis = humphreys_fibrosis[~humphreys_fibrosis.obs.celltype0421.isin(['EC', 'MÏ†', 'B/T', 'Uro'])].copy()

In [None]:
humphreys_fibrosis.obs.celltype0421.value_counts()

In [None]:
humphreys_fibrosis.X = humphreys_fibrosis.layers['raw']

In [None]:
# Step 1: Add the new category
humphreys_fibrosis.obs['celltype0421'] = humphreys_fibrosis.obs['celltype0421'].cat.add_categories(['Podo'])

# Step 2: Assign the new value
humphreys_fibrosis.obs.loc[humphreys_fibrosis.obs['celltype0421'] == 'Pod', 'celltype0421'] = 'Podo'

In [None]:
adata_upd.obs['Predicted_Celltype_lowres'] = adata_upd.obs.Predicted_Celltype_lowres.cat.add_categories(['TAL'])
adata_upd.obs.loc[adata_upd.obs.Predicted_Celltype_lowres == 'MTAL', 'Predicted_Celltype_lowres'] = 'TAL'
adata_upd.obs.loc[adata_upd.obs.Predicted_Celltype_lowres == 'CTAL', 'Predicted_Celltype_lowres'] = 'TAL'

In [None]:
adata_upd.obs.Predicted_Celltype_lowres = adata_upd.obs.Predicted_Celltype_lowres.cat.remove_unused_categories()
humphreys_fibrosis.obs.celltype0421 = humphreys_fibrosis.obs.celltype0421.cat.remove_unused_categories()

In [None]:
humphreys_fibrosis.obs.celltype0421.value_counts(), adata_upd.obs.Predicted_Celltype_lowres.value_counts()

In [None]:
adata_upd.raw.X

In [None]:
humphreys_fibrosis.obs["Origin"] = 'Humphreys23'
humphreys_fibrosis.obs["Source"] = 'Nuclei'

In [None]:
humphreys_fibrosis.var['mt'] = humphreys_fibrosis.var_names.str.startswith('mt-')
humphreys_fibrosis.var['ribo'] = humphreys_fibrosis.var_names.str.startswith(('Rps', 'Rpl'))
humphreys_fibrosis.var['hb'] = humphreys_fibrosis.var_names.str.contains(("^Hb.*-"))
sc.pp.calculate_qc_metrics(humphreys_fibrosis, qc_vars=['mt', 'ribo', 'hb'], percent_top=None,inplace=True)

In [None]:
humphreys_fibrosis.var_names_make_unique()         # Check for duplicates in adata_upd

In [None]:
humphreys_fibrosis = humphreys_fibrosis[humphreys_fibrosis.obs['sample'] != 'Health'].copy()

In [None]:
adata_upd = adata_upd.concatenate([humphreys_fibrosis])

In [None]:
adata_upd.obs.celltype0421 = adata_upd.obs.celltype0421.astype(str)
adata_upd.obs.celltype0421.fillna('', inplace=True)
adata_upd.obs.Predicted_Celltype_lowres = adata_upd.obs.Predicted_Celltype_lowres.astype(str)
adata_upd.obs.Predicted_Celltype = adata_upd.obs.Predicted_Celltype.astype(str)
adata_upd.obs.Predicted_Celltype_lowres.fillna('', inplace=True)
adata_upd.obs.Predicted_Celltype.fillna('', inplace=True)

In [None]:
adata_upd.obs["Predicted_Celltype"] = adata_upd.obs[["Predicted_Celltype", "celltype0421"]].agg(''.join, axis=1)
adata_upd.obs["Predicted_Celltype"] = adata_upd.obs["Predicted_Celltype"].str.replace('nan', '')

In [None]:
adata_upd.obs.Predicted_Celltype = adata_upd.obs.Predicted_Celltype.astype('category')

In [None]:
adata_upd.obs.loc[adata_upd.obs.Predicted_Celltype == 'MTAL', 'Predicted_Celltype'] = 'TAL'
adata_upd.obs.loc[adata_upd.obs.Predicted_Celltype == 'CTAL', 'Predicted_Celltype'] = 'TAL'

In [None]:
adata_upd.obs["Predicted_Celltype_lowres"] = adata_upd.obs[["Predicted_Celltype_lowres", "celltype0421"]].agg(''.join, axis=1)
adata_upd.obs["Predicted_Celltype_lowres"] = adata_upd.obs["Predicted_Celltype_lowres"].str.replace('nan', '')

In [None]:
adata_upd.obs.Predicted_Celltype_lowres = adata_upd.obs.Predicted_Celltype_lowres.astype('category')

In [None]:
adata_upd

In [None]:
obstokeep = ['Origin', 'Source', 'Technology', 'Tissue_res', 'Age', 'Genetic_background', 'Gender', 'Predicted_Celltype', 'Predicted_Celltype_lowres', 'sample', 'pct_counts_mt']

In [None]:
adata_upd.obs = adata_upd.obs.filter(obstokeep)

In [None]:
del adata_upd.var

In [None]:
adata_upd

In [None]:
mapping = {
    'Macrophages': 'Macro',
    'Resident Macrophages': 'Macro',
    'Monocyte/DC': 'DC',
    'Dendritic': 'DC',
'Mesanglial cells': 'MC'}
adata_upd.obs["Predicted_Celltype"] = adata_upd.obs["Predicted_Celltype"].replace(mapping)

In [None]:
humphreys_fibrosis.obs['celltype0421'].value_counts().to_frame()

In [None]:
to_remove = adata_upd.obs.loc[(adata_upd.obs.Origin == 'Humphreys23') & (~adata_upd.obs.Predicted_Celltype.isin(['PT-AcInj','PT-FR','PT-Inj','PT-R', 'Fib', 'Myofib']))].index

In [None]:
adata_upd = adata_upd[~adata_upd.obs_names.isin(to_remove)].copy()

In [None]:
adata_upd

In [None]:
adata_upd.obs['Predicted_Celltype_lowres'] = adata_upd.obs['Predicted_Celltype_lowres'].replace({'T regs': 'T lymph', 'Th17': 'T lymph'})

In [None]:
adata_upd.obs['Predicted_Celltype_lowres'].value_counts()

In [None]:
adata_upd.write_h5ad("/exports/archive/hg-groep-peters/Healthy_Mouse_Atlas_Claudio/KidneyAtlas/h5ad/atlas_updated_zimm_humphreys_raw.h5ad")

In [None]:
adata_upd = sc.read_h5ad("/exports/archive/hg-groep-peters/Healthy_Mouse_Atlas_Claudio/KidneyAtlas/h5ad/atlas_updated_zimm_humphreys_raw.h5ad")

In [None]:
adata_upd

In [None]:
annotations = pd.DataFrame(
    adata_upd.obs['Predicted_Celltype'], 
    columns=['Predicted_Celltype'], 
    index=adata_upd.obs_names
)
annotations.index.name = 'barcode'

# Extract batch information (assuming it's in .obs['Origin'])
annotations['Batch'] = adata_upd.obs['Origin']

# Save the DataFrame to a CSV file
annotations.to_csv('/exports/humgen/cnovellarausell/SevtapSpatial/cytospace/atlas_annotations_withZimmermanHumphreys_highres.csv', sep=',')

In [None]:
annotations

In [None]:
annotations = pd.DataFrame(adata_upd.obs['Predicted_Celltype_lowres'], columns=['Predicted_Celltype_lowres'], index=adata_upd.obs_names)
annotations.index.name = 'barcode'
annotations['Batch'] = adata_upd.obs['Origin']
annotations.to_csv('/exports/humgen/cnovellarausell/SevtapSpatial/cytospace/atlas_annotations_withZimmermanHumphreys_lowres.csv', sep=',')

In [None]:
adata_upd = adata_upd.T

In [None]:
adata_upd

In [None]:
counts = pd.DataFrame(data=adata_upd.X.toarray(), index=adata_upd.obs_names, columns=adata_upd.var_names)

In [None]:
counts.to_csv('/exports/humgen/cnovellarausell/SevtapSpatial/cytospace/atlas_raw_counts_full_withZimmermanHumphreys.csv')

In [None]:
adata_upd.raw = adata_upd  # keep full dimension safe
# sc.pp.highly_variable_genes(
#     adata_upd,
#     flavor="seurat_v3",
#     n_top_genes=3000,
#     batch_key="Origin",
#     subset=True
# )

In [None]:
adata_upd

In [None]:
sc.pp.filter_cells(adata_upd, min_counts=1)

In [None]:
adata_upd

In [None]:
scvi.model.SCVI.setup_anndata(adata_upd, batch_key="Origin", continuous_covariate_keys=['pct_counts_mt'], 
                      categorical_covariate_keys=['Source'])

In [None]:
vae = scvi.model.SCVI(adata_upd, n_layers=2, n_latent=26, gene_likelihood="nb", dropout_rate=0.09672091885923559)

In [None]:
vae.train(plan_kwargs={"lr":0.0013153399994028092})

In [None]:
plt.plot(vae.history['train_loss_step']['train_loss_step'], label='train_step', ls=":");
plt.plot(vae.history['train_loss_epoch']['train_loss_epoch'], label='train_epoch');

In [None]:
adata_upd.obsm["X_scVI"] = vae.get_latent_representation()
sc.pp.neighbors(adata_upd, use_rep="X_scVI")
# sc.tl.leiden(adata_upd)
sc.tl.umap(adata_upd)

In [None]:
adata_upd.obs.Predicted_Celltype_lowres.cat.add_categories(new_categories='Unknown', inplace=True)

In [None]:
adata_upd.obs.loc[adata_upd.obs.Origin.isin(['Conway20', 'Hinze20']), 'Predicted_Celltype_lowres'] = 'Unknown'

In [None]:
lvae = scvi.model.SCANVI.from_scvi_model(
    vae,
    adata=adata_upd,
    labels_key="Predicted_Celltype_lowres",
    unlabeled_category='Unknown'
)

In [None]:
lvae.train(max_epochs=20, n_samples_per_label=100)

In [None]:
adata_upd.obsm["X_scANVI"] = lvae.get_latent_representation(adata_upd)
sc.pp.neighbors(adata_upd, use_rep="X_scANVI")
#sc.tl.leiden(adata)
sc.tl.umap(adata_upd)

In [None]:
sc.pl.umap(
    adata_upd,
    color="Origin",
    frameon=False,
    show=False
)
plt.savefig("umap_atlas_zimmerman_humphreys_scanvi.svg")

In [None]:
adata_upd.obs["Predicted_Celltype_lowres"] = adata_upd.obs["Predicted_Celltype"].replace(mapping)

In [None]:
mapping = {
    'Lyc6 high Macrophages': 'Macro',
    'Macrophages': 'Macro',
    'Lyc6 low Macrophages': 'Macro',
    'Mrc1+ Resident Macrophages': 'Macro',
    'Resident Macrophages': 'Macro',
    'Spp1+ Resident Macrophages ': 'Macro',
    'Monocyte/DC': 'DC',
    'Dendritic': 'DC',
    'B1 B lymph': 'B lymph',
    'CD4+ T lymph': 'T lymph',
    'CD4+ T regs': 'T regs',
    'CD4+ Th17': 'Th17',
    'Gzma low NK': 'NK',
    'Gzma+ CD8+ T lymph': 'T lymph',
    'Gzma+ NK': 'NK',
    'Memory B lymph': 'B lymph',
    'NKT1': 'NK',
    'T1 B lymph': 'B lymph',
    'T3/Follicular B lymph': 'B lymph',
'Mesanglial cells': 'MC'}

adata_upd.obs["Predicted_Celltype_lowres"] = adata_upd.obs["Predicted_Celltype"].replace(mapping)

In [None]:
sc.pl.umap(
    adata_upd,
    color="Predicted_Celltype_lowres",
    frameon=False,
    show=False
)
plt.savefig("umap_atlas_zimmerman_humphreys_CT_scanvi.svg")

In [None]:
adata_upd.write_h5ad("/exports/humgen/cnovellarausell/KidneyAtlas/h5ad/atlas_full_SCVI_SCANVI_Zimmerman_Humphreys.h5ad")

In [None]:
lvae.save("/exports/humgen/cnovellarausell/KidneyAtlas/SCANVI_zimmerman_humphreys", save_anndata=True, overwrite=True)

In [None]:
lvae = scvi.model.SCANVI.load("/exports/humgen/cnovellarausell/KidneyAtlas/SCANVI_zimmerman_humphreys")