In [43]:
import scanpy as sc
import anndata as ad
import numpy as np
import pandas as pd
import pandas.testing as tm
import os
PATH = os.path.join(os.getenv("MLAB"), "projects/brcameta/brca_atlas")

In [2]:
imm = sc.read_h5ad(os.path.join(PATH, "data/sc/imm_cellxgene.h5ad"))
epi = sc.read_h5ad(os.path.join(PATH, "data/sc/epi_cellxgene.h5ad"))
strom = sc.read_h5ad(os.path.join(PATH, "data/sc/strom_cellxgene.h5ad"))
rpca_embedding = pd.read_csv(os.path.join(PATH, "data/embeddings/all/rpca_embedding_combined.csv"), index_col = 0)

## Cleaning Anndata slots

In [15]:
strom.var = strom.var.drop(strom.var.columns, axis='columns')
imm.var = imm.var.drop(imm.var.columns, axis='columns')
epi.var = epi.var.drop(epi.var.columns, axis='columns')

In [22]:
strom.obsm['X_umap'] = strom.obsm['X_umap.rpca']
imm.obsm['X_umap'] = imm.obsm['X_umap.rpca'] 
epi.obsm['X_umap'] = epi.obsm['X_umap.rpca']
del(strom.obsm['X_umap.rpca'])
del(imm.obsm['X_umap.rpca'])
del(epi.obsm['X_umap.rpca'])

In [100]:
strom.uns["title"] = "Stromal Compartment"
strom.uns["batch_condition"] = "batch"
strom.uns["default_embedding"] = "X_umap"
imm.uns["title"] = "Immune Compartment"
imm.uns["batch_condition"] = "batch"
imm.uns["default_embedding"] = "X_umap"
epi.uns["title"] = "Epithelial Compartment"
epi.uns["batch_condition"] = "batch"
epi.uns["default_embedding"] = "X_umap"

In [90]:
strom_rpca_embedding = rpca_embedding.loc[strom.obs_names] 
imm_rpca_embedding = rpca_embedding.loc[imm.obs_names]
epi_rpca_embedding = rpca_embedding.loc[epi.obs_names]

In [91]:
strom.obsm['X_rpca'] = strom_rpca_embedding
imm.obsm['X_rpca'] = imm_rpca_embedding
epi.obsm['X_rpca'] = epi_rpca_embedding

In [35]:
tm.assert_index_equal(imm.var_names, strom.var_names)
tm.assert_index_equal(epi.var_names, strom.var_names)
tm.assert_index_equal(epi.var_names, imm.var_names)

## Merged Anndata

In [44]:
combined_adata = ad.concat([epi, imm, strom], join = 'inner')

In [46]:
tm.assert_index_equal(combined_adata.var_names, imm.var_names)

In [66]:
rpca_umap = pd.read_csv(os.path.join(PATH, "data/embeddings/all/rpca_umap.csv"), index_col = 0)
rpca_embedding = rpca_embedding.loc[combined_adata.obs_names]
rpca_umap = rpca_umap.loc[combined_adata.obs_names]

In [70]:
combined_adata.obsm["X_umap"] = rpca_umap
combined_adata.obsm["X_rpca"] = rpca_embedding

In [99]:
combined_adata.uns["title"] = "Global Atlas"
combined_adata.uns["batch_condition"] = "batch"
combined_adata.uns["default_embedding"] = "X_umap"

## Saving

In [105]:
imm.write_h5ad(os.path.join(PATH, "data/sc/imm_cellxgene.h5ad"))
epi.write_h5ad(os.path.join(PATH, "data/sc/epi_cellxgene.h5ad"))
strom.write_h5ad(os.path.join(PATH, "data/sc/strom_cellxgene.h5ad"))
combined_adata.write_h5ad(os.path.join(PATH, "data/sc/all_cellxgene.h5ad"))