In [1]:
import sys
sys.path
sys.path.append('/home/mnt/nzh/nzhanglab/project/shuang/miniconda3/envs/scglue2/lib/python3.8/site-packages')


In [2]:
import anndata
import itertools
import networkx as nx
import pandas as pd
import scanpy as sc
import scglue
import seaborn as sns
from matplotlib import rcParams

In [3]:
scglue.plot.set_publication_params()
rcParams["figure.figsize"] = (4, 4)

In [4]:
from datetime import datetime
time_now = datetime.now()
current_time = time_now.strftime("%H:%M:%S")

print("The current date and time is", current_time)

The current date and time is 14:23:06


In [5]:
rna = anndata.read_h5ad("retina/rna_preprocessed_2000.h5ad")
atac = anndata.read_h5ad("retina/atac_preprocessed_2000.h5ad")
graph = nx.read_graphml("retina/prior_2000.graphml.gz")

In [6]:
scglue.models.configure_dataset(
    rna, "NB", use_highly_variable=True,
    use_layer="counts", use_rep="X_pca"
)

In [7]:
scglue.models.configure_dataset(
    atac, "NB", use_highly_variable=True,
    use_rep="X_lsi"
)

In [8]:
graph = graph.subgraph(itertools.chain(
    rna.var.query("highly_variable").index,
    atac.var.query("highly_variable").index
))

In [None]:
glue = scglue.models.fit_SCGLUE(
    {"rna": rna, "atac": atac}, graph
)

[INFO] fit_SCGLUE: Pretraining SCGLUE model...
[INFO] autodevice: Using GPU 0 as computation device.
[INFO] SCGLUEModel: Setting `graph_batch_size` = 57930
[INFO] SCGLUEModel: Setting `max_epochs` = 683
[INFO] SCGLUEModel: Setting `patience` = 57
[INFO] SCGLUEModel: Setting `reduce_lr_patience` = 29
[INFO] SCGLUETrainer: Using training directory: "/tmp/GLUETMPopa8vvcq"
[INFO] SCGLUETrainer: [Epoch 10] train={'g_nll': 0.505, 'g_kl': 0.001, 'g_elbo': 0.506, 'x_rna_nll': 0.296, 'x_rna_kl': 0.016, 'x_rna_elbo': 0.312, 'x_atac_nll': 0.077, 'x_atac_kl': 0.001, 'x_atac_elbo': 0.078, 'dsc_loss': 0.682, 'vae_loss': 0.41, 'gen_loss': 0.376}, val={'g_nll': 0.502, 'g_kl': 0.001, 'g_elbo': 0.503, 'x_rna_nll': 0.3, 'x_rna_kl': 0.014, 'x_rna_elbo': 0.314, 'x_atac_nll': 0.068, 'x_atac_kl': 0.001, 'x_atac_elbo': 0.069, 'dsc_loss': 0.684, 'vae_loss': 0.403, 'gen_loss': 0.369}, 2.7s elapsed


In [None]:
glue.save("retina_glue.dill")

In [None]:
glue = scglue.models.load_model("retina_glue.dill")

In [None]:
dx = scglue.models.integration_consistency(
    glue, {"rna": rna, "atac": atac}, graph,
    count_layers={"rna": "counts"}
)
dx

In [None]:
_ = sns.lineplot(x="n_meta", y="consistency", data=dx).axhline(y=0.05, c="darkred", ls="--")


In [None]:
rna.obsm["X_glue"] = glue.encode_data("rna", rna)
atac.obsm["X_glue"] = glue.encode_data("atac", atac)

In [None]:
import numpy
rna.obs_names = numpy.char.add("rna",rna.obs_names)
atac.obs_names = numpy.char.add("atac",atac.obs_names)

In [None]:
combined = anndata.concat([rna, atac])


In [None]:
sc.pp.neighbors(combined, use_rep="X_glue", metric="cosine")
sc.tl.umap(combined)
sc.pl.umap(combined, color=["celltype", "domain"], wspace=0.65)

In [None]:
feature_embeddings = glue.encode_graph(graph)
feature_embeddings = pd.DataFrame(feature_embeddings, index=glue.vertices)
feature_embeddings.iloc[:5, :5]

In [None]:
glue_array = combined.obsm['X_glue']
import numpy as np
glue_array = pd.DataFrame(glue_array)
glue_array.index = np.concatenate([rna.obs_names,atac.obs_names])

In [None]:
glue_array.to_csv('/home/mnt/nzh/nzhanglab/project/shuang/scATAC/comparison_methods/scglue/subset_2000_retina_glue_embeddings.csv', index=True)

In [None]:
from datetime import datetime
time_now = datetime.now()
current_time = time_now.strftime("%H:%M:%S")

print("The current date and time is", current_time)