In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings

import networkx as nx
import scglue
from itertools import chain
import itertools
from tqdm import tqdm

import scanpy as sc
import anndata as ad
import numpy as np
import scipy
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import preprocessing as pp

import time
from tqdm import tqdm
import seaborn as sns
import scib_metrics
import helper_functions as fcts

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
i = 's1d2'
atac = sc.read('data/'+i+'/atac-glue.h5ad')
rna = sc.read('data/'+i+'/rna-glue.h5ad')
guidance = nx.read_graphml('data/'+i+'/guidance.graphml.gz')

In [4]:
scglue.models.configure_dataset(
    rna, "NB", use_highly_variable=True,
    use_layer="counts", use_rep="X_pca",
    use_obs_names=True
)

In [5]:
scglue.models.configure_dataset(
    atac, "NB", use_highly_variable=True,
    use_layer="counts", use_rep="X_lsi_red",
    use_obs_names=True
)

In [6]:
guidance_hvf = guidance.subgraph(chain(
    rna.var.query("highly_variable").index,
    atac.var.query("highly_variable").index
)).copy()

In [7]:
glue = scglue.models.fit_SCGLUE(
    {"rna": rna, "atac": atac}, guidance_hvf,
    model=scglue.models.PairedSCGLUEModel,
    fit_kws={"directory": "glue"}
)

[INFO] fit_SCGLUE: Pretraining SCGLUE model...




[INFO] autodevice: Using CPU as computation device.
[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 3437
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 507
[INFO] PairedSCGLUEModel: Setting `patience` = 43
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 22
[INFO] PairedSCGLUETrainer: Using training directory: "glue/pretrain"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.624, 'g_kl': 0.034, 'g_elbo': 0.659, 'x_rna_nll': 0.528, 'x_rna_kl': 0.014, 'x_rna_elbo': 0.542, 'x_atac_nll': 0.671, 'x_atac_kl': 0.007, 'x_atac_elbo': 0.678, 'dsc_loss': 0.683, 'vae_loss': 1.301, 'gen_loss': 1.267, 'joint_cross_loss': 1.197, 'real_cross_loss': 1.216, 'cos_loss': 0.342}, val={'g_nll': 0.626, 'g_kl': 0.035, 'g_elbo': 0.661, 'x_rna_nll': 0.521, 'x_rna_kl': 0.012, 'x_rna_elbo': 0.533, 'x_atac

2023-05-05 14:04:56,054 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "112"...
[INFO] EarlyStopping: Restoring checkpoint "112"...
[INFO] fit_SCGLUE: Estimating balancing weight...
[INFO] estimate_balancing_weight: Clustering cells...
[INFO] estimate_balancing_weight: Matching clusters...
[INFO] estimate_balancing_weight: Matching array shape = (16, 18)...
[INFO] estimate_balancing_weight: Estimating balancing weight...




[INFO] fit_SCGLUE: Fine-tuning SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 3437
[INFO] PairedSCGLUEModel: Setting `align_burnin` = 85
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 507
[INFO] PairedSCGLUEModel: Setting `patience` = 43
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 22
[INFO] PairedSCGLUETrainer: Using training directory: "glue/fine-tune"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.442, 'g_kl': 0.036, 'g_elbo': 0.479, 'x_rna_nll': 0.504, 'x_rna_kl': 0.009, 'x_rna_elbo': 0.513, 'x_atac_nll': 0.653, 'x_atac_kl': 0.003, 'x_atac_elbo': 0.657, 'dsc_loss': 0.691, 'vae_loss': 1.246, 'gen_loss': 1.211, 'joint_cross_loss': 1.163, 'real_cross_loss': 1.189, 'cos_loss': 0.461}, val={'g_nll': 0.44, 'g_kl': 0.036, 'g_elbo': 0.476, 'x_rna_nll': 0.49, 'x_rna_kl': 0.009, 'x_rna_elbo': 0.499, 'x_ata

2023-05-05 14:35:06,669 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "161"...
[INFO] EarlyStopping: Restoring checkpoint "161"...


In [8]:
rna.obsm["X_glue"] = glue.encode_data("rna", rna)
atac.obsm["X_glue"] = glue.encode_data("atac", atac)

In [11]:
adata = sc.concat([rna, atac], join = 'outer', label='batch', index_unique = '-', keys=["RNA", "ATAC"])

In [12]:
adata

AnnData object with n_obs × n_vars = 13480 × 22805
    obs: 'GEX_pct_counts_mt', 'GEX_n_counts', 'GEX_n_genes', 'GEX_size_factors', 'GEX_phase', 'ATAC_nCount_peaks', 'ATAC_atac_fragments', 'ATAC_reads_in_peaks_frac', 'ATAC_blacklist_fraction', 'ATAC_nucleosome_signal', 'cell_type', 'batch', 'ATAC_pseudotime_order', 'GEX_pseudotime_order', 'Samplename', 'Site', 'DonorNumber', 'Modality', 'VendorLot', 'DonorID', 'DonorAge', 'DonorBMI', 'DonorBloodType', 'DonorRace', 'Ethnicity', 'DonorGender', 'QCMeds', 'DonorSmoker', 'n_counts', '_scvi_batch', '_scvi_labels', 'cluster', 'balancing_weight'
    obsm: 'ATAC_gene_activity', 'ATAC_lsi_full', 'ATAC_lsi_red', 'ATAC_umap', 'GEX_X_pca', 'GEX_X_umap', 'X_mde', 'X_pca', 'X_scVI', 'X_umap', 'X_glue', 'ATAC_lsi_red_new', 'X_PeakVI', 'X_lsi', 'X_lsi_red', 'X_poissonVI'
    layers: 'counts'

In [10]:
f = fcts.foscttm(rna.obsm["X_glue"], atac.obsm["X_glue"])
print("The average FOSCTTM for the alignment is: ", f)

The average FOSCTTM for the alignment is:  0.0422


In [14]:
import scib

In [16]:
# Evaluation metrics
f = fcts.foscttm(rna.obsm["X_glue"], atac.obsm["X_glue"])
lta = fcts.transfer_accuracy(rna.obsm["X_glue"], atac.obsm["X_glue"], rna.obs['cell_type'], atac.obs['cell_type'])
alg_score = fcts.alignment_score(rna.obsm["X_glue"], atac.obsm["X_glue"])
rmse = fcts.calculate_rmse(rna.obsm["X_glue"], atac.obsm["X_glue"])

# Bio-conservation metrics
sc.pp.neighbors(adata, use_rep='X_glue')
scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
nmi = scib.me.nmi(adata, cluster_key='cluster_key', label_key='cell_type')
asw_label = scib.me.silhouette(adata, label_key='cell_type', embed='X_glue')
ari = scib.me.ari(adata, cluster_key='cluster_key', label_key='cell_type')

# Batch correction metrics
asw_batch = scib.me.silhouette_batch(adata,batch_key='batch',label_key='cell_type',embed='X_glue',verbose=False)
graph_connectivity = scib.me.graph_connectivity(adata, label_key='cell_type')

  scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
  import louvain


In [19]:
results = []
results.append([i, f, lta, alg_score, rmse, nmi, asw_label, ari, asw_batch, graph_connectivity])

In [21]:
df_results = pd.DataFrame(
    results,
    columns = ["donor", "foscttm", "label_transfer_accuracy", "alignment_score", "rmse", "nmi", "asw_label", "ari", "asw_batch", "graph_connectivity"]
)

In [22]:
df_results

Unnamed: 0,donor,foscttm,label_transfer_accuracy,alignment_score,rmse,nmi,asw_label,ari,asw_batch,graph_connectivity
0,s1d2,0.0422,0.842285,0.956476,0.169282,0.700974,0.585814,0.648648,0.910727,0.951778


In [23]:
df_results.to_csv('output/scglue_2.csv', index = False)

In [24]:
for i in tqdm(['s1d3', 's2d1', 's2d4', 's2d5', 's3d10', 's3d3', 's3d6', 's3d7', 's4d1', 's4d8', 's4d9']):
    atac = sc.read('data/'+i+'/atac-glue.h5ad')
    rna = sc.read('data/'+i+'/rna-glue.h5ad')
    guidance = nx.read_graphml('data/'+i+'/guidance.graphml.gz')
    
    scglue.models.configure_dataset(rna, "NB", use_highly_variable=True,use_layer="counts", use_rep="X_pca",use_obs_names=True)
    scglue.models.configure_dataset(atac, "NB", use_highly_variable=True,use_layer="counts", use_rep="X_lsi_red",use_obs_names=True)
    guidance_hvf = guidance.subgraph(chain(rna.var.query("highly_variable").index,atac.var.query("highly_variable").index)).copy()
    
    glue = scglue.models.fit_SCGLUE({"rna": rna, "atac": atac}, guidance_hvf,model=scglue.models.PairedSCGLUEModel,fit_kws={"directory": "glue"})
    
    rna.obsm["X_glue"] = glue.encode_data("rna", rna)
    atac.obsm["X_glue"] = glue.encode_data("atac", atac)
    adata = sc.concat([rna, atac], join = 'outer', label='batch', index_unique = '-', keys=["RNA", "ATAC"])
    
    # Evaluation metrics
    f = fcts.foscttm(rna.obsm["X_glue"], atac.obsm["X_glue"])
    lta = fcts.transfer_accuracy(rna.obsm["X_glue"], atac.obsm["X_glue"], rna.obs['cell_type'], atac.obs['cell_type'])
    alg_score = fcts.alignment_score(rna.obsm["X_glue"], atac.obsm["X_glue"])
    rmse = fcts.calculate_rmse(rna.obsm["X_glue"], atac.obsm["X_glue"])

    # Bio-conservation metrics
    sc.pp.neighbors(adata, use_rep='X_glue')
    scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
    nmi = scib.me.nmi(adata, cluster_key='cluster_key', label_key='cell_type')
    asw_label = scib.me.silhouette(adata, label_key='cell_type', embed='X_glue')
    ari = scib.me.ari(adata, cluster_key='cluster_key', label_key='cell_type')

    # Batch correction metrics
    asw_batch = scib.me.silhouette_batch(adata,batch_key='batch',label_key='cell_type',embed='X_glue',verbose=False)
    graph_connectivity = scib.me.graph_connectivity(adata, label_key='cell_type')
    
    results.append([i, f, lta, alg_score, rmse, nmi, asw_label, ari, asw_batch, graph_connectivity])

  0%|          | 0/11 [00:00<?, ?it/s]

[INFO] fit_SCGLUE: Pretraining SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 3438
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 798
[INFO] PairedSCGLUEModel: Setting `patience` = 67
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 34
[INFO] PairedSCGLUETrainer: Using training directory: "glue/pretrain"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.586, 'g_kl': 0.025, 'g_elbo': 0.611, 'x_rna_nll': 0.509, 'x_rna_kl': 0.021, 'x_rna_elbo': 0.529, 'x_atac_nll': 0.699, 'x_atac_kl': 0.013, 'x_atac_elbo': 0.712, 'dsc_loss': 0.66, 'vae_loss': 1.32, 'gen_loss': 1.287, 'joint_cross_loss': 1.206, 'real_cross_loss': 1.232, 'cos_loss': 0.279}, val={'g_nll': 0.589, 'g_kl': 0.026, 'g_elbo': 0.615, 'x_rna_nll': 0.51, 'x_rna_kl': 0.02, 'x_rna_elbo': 0.53, 'x_atac_nll': 0.705, 'x_atac_kl': 0.012, 'x_atac_elbo': 0.717, '

2023-05-05 15:55:54,432 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "249"...
[INFO] EarlyStopping: Restoring checkpoint "249"...


  self.pid = os.fork()


[INFO] fit_SCGLUE: Estimating balancing weight...
[INFO] estimate_balancing_weight: Clustering cells...
[INFO] estimate_balancing_weight: Matching clusters...
[INFO] estimate_balancing_weight: Matching array shape = (20, 14)...
[INFO] estimate_balancing_weight: Estimating balancing weight...
[INFO] fit_SCGLUE: Fine-tuning SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 3438
[INFO] PairedSCGLUEModel: Setting `align_burnin` = 133
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 798
[INFO] PairedSCGLUEModel: Setting `patience` = 67
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 34
[INFO] PairedSCGLUETrainer: Using training directory: "glue/fine-tune"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.441, 'g_kl': 0.038, 'g_elbo': 0.478, 'x_rna_nll': 0.461, 'x_rna_kl': 0.011, 'x_rna_elbo': 0.472, 'x_atac_nll': 0.67, 'x_atac_kl': 0.004, 'x_atac_elbo': 0.675, 'dsc_loss': 0.692, 'vae_loss': 1.22, 'gen_loss': 1.185, 'joint_cross_loss': 1.137, 'real_cross_loss': 1.165, 'cos_loss': 0.427}, val={'g_nll': 0.436, 'g_kl': 0.038, 'g_elbo': 0.474, 'x_rna_nll': 0.449, 'x_rna_kl': 0.011, 'x_rna_elbo': 0.459, 'x_at

2023-05-05 16:47:02,277 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "293"...
[INFO] EarlyStopping: Restoring checkpoint "293"...


  self.pid = os.fork()
  scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
  9%|▉         | 1/11 [1:42:58<17:09:41, 6178.15s/it]

[INFO] fit_SCGLUE: Pretraining SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 2677
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 809
[INFO] PairedSCGLUEModel: Setting `patience` = 68
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 34
[INFO] PairedSCGLUETrainer: Using training directory: "glue/pretrain"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.598, 'g_kl': 0.032, 'g_elbo': 0.63, 'x_rna_nll': 0.524, 'x_rna_kl': 0.018, 'x_rna_elbo': 0.542, 'x_atac_nll': 0.563, 'x_atac_kl': 0.01, 'x_atac_elbo': 0.573, 'dsc_loss': 0.657, 'vae_loss': 1.192, 'gen_loss': 1.159, 'joint_cross_loss': 1.088, 'real_cross_loss': 1.116, 'cos_loss': 0.375}, val={'g_nll': 0.6, 'g_kl': 0.033, 'g_elbo': 0.633, 'x_rna_nll': 0.498, 'x_rna_kl': 0.016, 'x_rna_elbo': 0.513, 'x_atac_nll': 0.561, 'x_atac_kl': 0.009, 'x_atac_elbo': 0.57, '

2023-05-05 18:19:23,264 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "338"...
[INFO] EarlyStopping: Restoring checkpoint "338"...


  self.pid = os.fork()


[INFO] fit_SCGLUE: Estimating balancing weight...
[INFO] estimate_balancing_weight: Clustering cells...
[INFO] estimate_balancing_weight: Matching clusters...
[INFO] estimate_balancing_weight: Matching array shape = (19, 16)...
[INFO] estimate_balancing_weight: Estimating balancing weight...
[INFO] fit_SCGLUE: Fine-tuning SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 2677
[INFO] PairedSCGLUEModel: Setting `align_burnin` = 135
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 809
[INFO] PairedSCGLUEModel: Setting `patience` = 68
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 34
[INFO] PairedSCGLUETrainer: Using training directory: "glue/fine-tune"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.431, 'g_kl': 0.043, 'g_elbo': 0.474, 'x_rna_nll': 0.483, 'x_rna_kl': 0.009, 'x_rna_elbo': 0.493, 'x_atac_nll': 0.537, 'x_atac_kl': 0.003, 'x_atac_elbo': 0.541, 'dsc_loss': 0.693, 'vae_loss': 1.104, 'gen_loss': 1.069, 'joint_cross_loss': 1.03, 'real_cross_loss': 1.068, 'cos_loss': 0.474}, val={'g_nll': 0.434, 'g_kl': 0.043, 'g_elbo': 0.477, 'x_rna_nll': 0.482, 'x_rna_kl': 0.009, 'x_rna_elbo': 0.492, 'x_a

2023-05-05 19:18:27,820 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "287"...
[INFO] EarlyStopping: Restoring checkpoint "287"...


  self.pid = os.fork()
  scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
 18%|█▊        | 2/11 [4:14:23<19:43:14, 7888.25s/it]

[INFO] fit_SCGLUE: Pretraining SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 3033
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 559
[INFO] PairedSCGLUEModel: Setting `patience` = 47
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 24
[INFO] PairedSCGLUETrainer: Using training directory: "glue/pretrain"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.611, 'g_kl': 0.035, 'g_elbo': 0.646, 'x_rna_nll': 0.45, 'x_rna_kl': 0.013, 'x_rna_elbo': 0.463, 'x_atac_nll': 0.555, 'x_atac_kl': 0.008, 'x_atac_elbo': 0.562, 'dsc_loss': 0.679, 'vae_loss': 1.098, 'gen_loss': 1.064, 'joint_cross_loss': 1.003, 'real_cross_loss': 1.024, 'cos_loss': 0.348}, val={'g_nll': 0.611, 'g_kl': 0.036, 'g_elbo': 0.646, 'x_rna_nll': 0.452, 'x_rna_kl': 0.011, 'x_rna_elbo': 0.463, 'x_atac_nll': 0.562, 'x_atac_kl': 0.006, 'x_atac_elbo': 0.56

2023-05-05 20:11:56,055 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "165"...
[INFO] EarlyStopping: Restoring checkpoint "165"...


  self.pid = os.fork()


[INFO] fit_SCGLUE: Estimating balancing weight...
[INFO] estimate_balancing_weight: Clustering cells...
[INFO] estimate_balancing_weight: Matching clusters...
[INFO] estimate_balancing_weight: Matching array shape = (15, 16)...
[INFO] estimate_balancing_weight: Estimating balancing weight...
[INFO] fit_SCGLUE: Fine-tuning SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 3033
[INFO] PairedSCGLUEModel: Setting `align_burnin` = 94
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 559
[INFO] PairedSCGLUEModel: Setting `patience` = 47
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 24
[INFO] PairedSCGLUETrainer: Using training directory: "glue/fine-tune"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.447, 'g_kl': 0.04, 'g_elbo': 0.487, 'x_rna_nll': 0.425, 'x_rna_kl': 0.008, 'x_rna_elbo': 0.433, 'x_atac_nll': 0.539, 'x_atac_kl': 0.004, 'x_atac_elbo': 0.543, 'dsc_loss': 0.692, 'vae_loss': 1.044, 'gen_loss': 1.009, 'joint_cross_loss': 0.969, 'real_cross_loss': 0.992, 'cos_loss': 0.464}, val={'g_nll': 0.445, 'g_kl': 0.04, 'g_elbo': 0.485, 'x_rna_nll': 0.422, 'x_rna_kl': 0.008, 'x_rna_elbo': 0.431, 'x_ata

2023-05-05 21:05:56,619 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "206"...
[INFO] EarlyStopping: Restoring checkpoint "206"...


  self.pid = os.fork()
  scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
 27%|██▋       | 3/11 [6:02:17<16:05:39, 7242.38s/it]

[INFO] fit_SCGLUE: Pretraining SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 2984
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 698
[INFO] PairedSCGLUEModel: Setting `patience` = 59
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 30
[INFO] PairedSCGLUETrainer: Using training directory: "glue/pretrain"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.63, 'g_kl': 0.033, 'g_elbo': 0.664, 'x_rna_nll': 0.511, 'x_rna_kl': 0.013, 'x_rna_elbo': 0.524, 'x_atac_nll': 0.558, 'x_atac_kl': 0.008, 'x_atac_elbo': 0.566, 'dsc_loss': 0.683, 'vae_loss': 1.167, 'gen_loss': 1.133, 'joint_cross_loss': 1.067, 'real_cross_loss': 1.082, 'cos_loss': 0.364}, val={'g_nll': 0.634, 'g_kl': 0.034, 'g_elbo': 0.668, 'x_rna_nll': 0.511, 'x_rna_kl': 0.012, 'x_rna_elbo': 0.523, 'x_atac_nll': 0.556, 'x_atac_kl': 0.007, 'x_atac_elbo': 0.56

2023-05-05 21:47:19,254 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "160"...
[INFO] EarlyStopping: Restoring checkpoint "160"...


  self.pid = os.fork()


[INFO] fit_SCGLUE: Estimating balancing weight...
[INFO] estimate_balancing_weight: Clustering cells...
[INFO] estimate_balancing_weight: Matching clusters...
[INFO] estimate_balancing_weight: Matching array shape = (13, 14)...
[INFO] estimate_balancing_weight: Estimating balancing weight...
[INFO] fit_SCGLUE: Fine-tuning SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 2984
[INFO] PairedSCGLUEModel: Setting `align_burnin` = 117
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 698
[INFO] PairedSCGLUEModel: Setting `patience` = 59
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 30
[INFO] PairedSCGLUETrainer: Using training directory: "glue/fine-tune"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.444, 'g_kl': 0.039, 'g_elbo': 0.483, 'x_rna_nll': 0.486, 'x_rna_kl': 0.007, 'x_rna_elbo': 0.493, 'x_atac_nll': 0.533, 'x_atac_kl': 0.003, 'x_atac_elbo': 0.535, 'dsc_loss': 0.69, 'vae_loss': 1.099, 'gen_loss': 1.065, 'joint_cross_loss': 1.024, 'real_cross_loss': 1.045, 'cos_loss': 0.495}, val={'g_nll': 0.442, 'g_kl': 0.039, 'g_elbo': 0.48, 'x_rna_nll': 0.489, 'x_rna_kl': 0.007, 'x_rna_elbo': 0.495, 'x_at

2023-05-05 22:37:19,063 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "187"...
[INFO] EarlyStopping: Restoring checkpoint "187"...


  self.pid = os.fork()
  scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
 36%|███▋      | 4/11 [7:33:23<12:43:08, 6541.20s/it]

[INFO] fit_SCGLUE: Pretraining SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 3427
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 504
[INFO] PairedSCGLUEModel: Setting `patience` = 42
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 21
[INFO] PairedSCGLUETrainer: Using training directory: "glue/pretrain"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.602, 'g_kl': 0.036, 'g_elbo': 0.638, 'x_rna_nll': 0.5, 'x_rna_kl': 0.013, 'x_rna_elbo': 0.513, 'x_atac_nll': 0.578, 'x_atac_kl': 0.007, 'x_atac_elbo': 0.585, 'dsc_loss': 0.678, 'vae_loss': 1.174, 'gen_loss': 1.14, 'joint_cross_loss': 1.078, 'real_cross_loss': 1.1, 'cos_loss': 0.352}, val={'g_nll': 0.6, 'g_kl': 0.037, 'g_elbo': 0.637, 'x_rna_nll': 0.499, 'x_rna_kl': 0.012, 'x_rna_elbo': 0.511, 'x_atac_nll': 0.58, 'x_atac_kl': 0.006, 'x_atac_elbo': 0.586, 'dsc

2023-05-05 23:13:26,799 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "145"...
[INFO] EarlyStopping: Restoring checkpoint "145"...


  self.pid = os.fork()


[INFO] fit_SCGLUE: Estimating balancing weight...
[INFO] estimate_balancing_weight: Clustering cells...
[INFO] estimate_balancing_weight: Matching clusters...
[INFO] estimate_balancing_weight: Matching array shape = (16, 17)...
[INFO] estimate_balancing_weight: Estimating balancing weight...
[INFO] fit_SCGLUE: Fine-tuning SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 3427
[INFO] PairedSCGLUEModel: Setting `align_burnin` = 84
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 504
[INFO] PairedSCGLUEModel: Setting `patience` = 42
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 21
[INFO] PairedSCGLUETrainer: Using training directory: "glue/fine-tune"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.445, 'g_kl': 0.039, 'g_elbo': 0.483, 'x_rna_nll': 0.479, 'x_rna_kl': 0.01, 'x_rna_elbo': 0.489, 'x_atac_nll': 0.56, 'x_atac_kl': 0.004, 'x_atac_elbo': 0.564, 'dsc_loss': 0.69, 'vae_loss': 1.124, 'gen_loss': 1.089, 'joint_cross_loss': 1.044, 'real_cross_loss': 1.068, 'cos_loss': 0.441}, val={'g_nll': 0.443, 'g_kl': 0.039, 'g_elbo': 0.481, 'x_rna_nll': 0.487, 'x_rna_kl': 0.01, 'x_rna_elbo': 0.497, 'x_atac_

Engine run is terminating due to exception: 


[INFO] PairedSCGLUETrainer: Stopping training due to user interrupt...


  self.pid = os.fork()
  scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
 45%|████▌     | 5/11 [8:35:41<9:13:01, 5530.24s/it] 

[INFO] fit_SCGLUE: Pretraining SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 4560
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 790
[INFO] PairedSCGLUEModel: Setting `patience` = 66
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 33
[INFO] PairedSCGLUETrainer: Using training directory: "glue/pretrain"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.613, 'g_kl': 0.024, 'g_elbo': 0.637, 'x_rna_nll': 0.446, 'x_rna_kl': 0.014, 'x_rna_elbo': 0.459, 'x_atac_nll': 0.638, 'x_atac_kl': 0.009, 'x_atac_elbo': 0.647, 'dsc_loss': 0.681, 'vae_loss': 1.182, 'gen_loss': 1.148, 'joint_cross_loss': 1.081, 'real_cross_loss': 1.095, 'cos_loss': 0.303}, val={'g_nll': 0.613, 'g_kl': 0.024, 'g_elbo': 0.637, 'x_rna_nll': 0.451, 'x_rna_kl': 0.012, 'x_rna_elbo': 0.463, 'x_atac_nll': 0.671, 'x_atac_kl': 0.009, 'x_atac_elbo': 0.6

Engine run is terminating due to exception: 


[INFO] PairedSCGLUETrainer: Stopping training due to user interrupt...


  self.pid = os.fork()
 45%|████▌     | 5/11 [8:38:51<10:22:37, 6226.27s/it]

KeyboardInterrupt



In [26]:
df_results = pd.DataFrame(
    results,
    columns = ["donor", "foscttm", "label_transfer_accuracy", "alignment_score", "rmse", "nmi", "asw_label", "ari", "asw_batch", "graph_connectivity"]
)

In [28]:
df_results.to_csv('output/scglue_2.csv', index = False)

In [29]:
for i in tqdm(['s3d3', 's3d6']):
    atac = sc.read('data/'+i+'/atac-glue.h5ad')
    rna = sc.read('data/'+i+'/rna-glue.h5ad')
    guidance = nx.read_graphml('data/'+i+'/guidance.graphml.gz')
    
    scglue.models.configure_dataset(rna, "NB", use_highly_variable=True,use_layer="counts", use_rep="X_pca",use_obs_names=True)
    scglue.models.configure_dataset(atac, "NB", use_highly_variable=True,use_layer="counts", use_rep="X_lsi_red",use_obs_names=True)
    guidance_hvf = guidance.subgraph(chain(rna.var.query("highly_variable").index,atac.var.query("highly_variable").index)).copy()
    
    glue = scglue.models.fit_SCGLUE({"rna": rna, "atac": atac}, guidance_hvf,model=scglue.models.PairedSCGLUEModel,fit_kws={"directory": "glue"})
    
    rna.obsm["X_glue"] = glue.encode_data("rna", rna)
    atac.obsm["X_glue"] = glue.encode_data("atac", atac)
    adata = sc.concat([rna, atac], join = 'outer', label='batch', index_unique = '-', keys=["RNA", "ATAC"])
    
    # Evaluation metrics
    f = fcts.foscttm(rna.obsm["X_glue"], atac.obsm["X_glue"])
    lta = fcts.transfer_accuracy(rna.obsm["X_glue"], atac.obsm["X_glue"], rna.obs['cell_type'], atac.obs['cell_type'])
    alg_score = fcts.alignment_score(rna.obsm["X_glue"], atac.obsm["X_glue"])
    rmse = fcts.calculate_rmse(rna.obsm["X_glue"], atac.obsm["X_glue"])

    # Bio-conservation metrics
    sc.pp.neighbors(adata, use_rep='X_glue')
    scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
    nmi = scib.me.nmi(adata, cluster_key='cluster_key', label_key='cell_type')
    asw_label = scib.me.silhouette(adata, label_key='cell_type', embed='X_glue')
    ari = scib.me.ari(adata, cluster_key='cluster_key', label_key='cell_type')

    # Batch correction metrics
    asw_batch = scib.me.silhouette_batch(adata,batch_key='batch',label_key='cell_type',embed='X_glue',verbose=False)
    graph_connectivity = scib.me.graph_connectivity(adata, label_key='cell_type')
    
    results.append([i, f, lta, alg_score, rmse, nmi, asw_label, ari, asw_batch, graph_connectivity])
    df_results = pd.DataFrame(
        results,
        columns = ["donor", "foscttm", "label_transfer_accuracy", "alignment_score", "rmse", "nmi", "asw_label", "ari", "asw_batch", "graph_connectivity"]
    )
    df_results.to_csv('output/scglue_2.csv', index = False)

  0%|          | 0/2 [00:00<?, ?it/s]

[INFO] fit_SCGLUE: Pretraining SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 4560
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 790
[INFO] PairedSCGLUEModel: Setting `patience` = 66
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 33
[INFO] PairedSCGLUETrainer: Using training directory: "glue/pretrain"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.613, 'g_kl': 0.024, 'g_elbo': 0.637, 'x_rna_nll': 0.446, 'x_rna_kl': 0.014, 'x_rna_elbo': 0.459, 'x_atac_nll': 0.638, 'x_atac_kl': 0.009, 'x_atac_elbo': 0.647, 'dsc_loss': 0.681, 'vae_loss': 1.182, 'gen_loss': 1.148, 'joint_cross_loss': 1.081, 'real_cross_loss': 1.095, 'cos_loss': 0.304}, val={'g_nll': 0.613, 'g_kl': 0.024, 'g_elbo': 0.637, 'x_rna_nll': 0.451, 'x_rna_kl': 0.012, 'x_rna_elbo': 0.463, 'x_atac_nll': 0.671, 'x_atac_kl': 0.009, 'x_atac_elbo': 0.6

2023-05-06 00:12:08,611 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: No usable checkpoint found. Skipping checkpoint restoration.
[INFO] EarlyStopping: No usable checkpoint found. Skipping checkpoint restoration.


  self.pid = os.fork()


[INFO] fit_SCGLUE: Estimating balancing weight...
[INFO] estimate_balancing_weight: Clustering cells...
[INFO] estimate_balancing_weight: Matching clusters...
[INFO] estimate_balancing_weight: Matching array shape = (18, 17)...
[INFO] estimate_balancing_weight: Estimating balancing weight...
[INFO] fit_SCGLUE: Fine-tuning SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 4560
[INFO] PairedSCGLUEModel: Setting `align_burnin` = 132
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 790
[INFO] PairedSCGLUEModel: Setting `patience` = 66
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 33
[INFO] PairedSCGLUETrainer: Using training directory: "glue/fine-tune"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.453, 'g_kl': 0.033, 'g_elbo': 0.485, 'x_rna_nll': 0.422, 'x_rna_kl': 0.009, 'x_rna_elbo': 0.431, 'x_atac_nll': 0.62, 'x_atac_kl': 0.004, 'x_atac_elbo': 0.624, 'dsc_loss': 0.693, 'vae_loss': 1.126, 'gen_loss': 1.091, 'joint_cross_loss': 1.044, 'real_cross_loss': 1.059, 'cos_loss': 0.434}, val={'g_nll': 0.452, 'g_kl': 0.032, 'g_elbo': 0.484, 'x_rna_nll': 0.417, 'x_rna_kl': 0.009, 'x_rna_elbo': 0.426, 'x_a

2023-05-06 00:51:01,646 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "287"...
[INFO] EarlyStopping: Restoring checkpoint "287"...


  self.pid = os.fork()
  scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
 50%|█████     | 1/2 [1:06:37<1:06:37, 3997.47s/it]

[INFO] fit_SCGLUE: Pretraining SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 3398
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 2033
[INFO] PairedSCGLUEModel: Setting `patience` = 170
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 85
[INFO] PairedSCGLUETrainer: Using training directory: "glue/pretrain"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.583, 'g_kl': 0.019, 'g_elbo': 0.602, 'x_rna_nll': 0.535, 'x_rna_kl': 0.018, 'x_rna_elbo': 0.554, 'x_atac_nll': 0.598, 'x_atac_kl': 0.014, 'x_atac_elbo': 0.612, 'dsc_loss': 0.649, 'vae_loss': 1.244, 'gen_loss': 1.211, 'joint_cross_loss': 1.13, 'real_cross_loss': 1.158, 'cos_loss': 0.403}, val={'g_nll': 0.584, 'g_kl': 0.019, 'g_elbo': 0.603, 'x_rna_nll': 0.519, 'x_rna_kl': 0.017, 'x_rna_elbo': 0.536, 'x_atac_nll': 0.613, 'x_atac_kl': 0.014, 'x_atac_elbo': 0.

2023-05-06 02:17:32,799 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "370"...
[INFO] EarlyStopping: Restoring checkpoint "370"...


  self.pid = os.fork()


[INFO] fit_SCGLUE: Estimating balancing weight...
[INFO] estimate_balancing_weight: Clustering cells...
[INFO] estimate_balancing_weight: Matching clusters...
[INFO] estimate_balancing_weight: Matching array shape = (15, 13)...
[INFO] estimate_balancing_weight: Estimating balancing weight...
[INFO] fit_SCGLUE: Fine-tuning SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 3398
[INFO] PairedSCGLUEModel: Setting `align_burnin` = 339
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 2033
[INFO] PairedSCGLUEModel: Setting `patience` = 170
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 85
[INFO] PairedSCGLUETrainer: Using training directory: "glue/fine-tune"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.446, 'g_kl': 0.04, 'g_elbo': 0.486, 'x_rna_nll': 0.477, 'x_rna_kl': 0.01, 'x_rna_elbo': 0.487, 'x_atac_nll': 0.551, 'x_atac_kl': 0.004, 'x_atac_elbo': 0.555, 'dsc_loss': 0.697, 'vae_loss': 1.112, 'gen_loss': 1.078, 'joint_cross_loss': 1.035, 'real_cross_loss': 1.068, 'cos_loss': 0.48}, val={'g_nll': 0.442, 'g_kl': 0.04, 'g_elbo': 0.482, 'x_rna_nll': 0.489, 'x_rna_kl': 0.01, 'x_rna_elbo': 0.499, 'x_ata

2023-05-06 04:33:54,239 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "664"...
[INFO] EarlyStopping: Restoring checkpoint "664"...


  self.pid = os.fork()
  scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
100%|██████████| 2/2 [4:49:05<00:00, 8672.54s/it]  


In [30]:
df_results = pd.DataFrame(
    results,
    columns = ["donor", "foscttm", "label_transfer_accuracy", "alignment_score", "rmse", "nmi", "asw_label", "ari", "asw_batch", "graph_connectivity"]
)
df_results.to_csv('output/scglue_2.csv', index = False)

In [31]:
df_results

Unnamed: 0,donor,foscttm,label_transfer_accuracy,alignment_score,rmse,nmi,asw_label,ari,asw_batch,graph_connectivity
0,s1d2,0.0422,0.842285,0.956476,0.169282,0.700974,0.585814,0.648648,0.910727,0.951778
1,s1d3,0.0326,0.843421,0.968863,0.149477,0.718436,0.57009,0.513514,0.937366,0.972907
2,s2d1,0.0815,0.86019,0.900669,0.168181,0.724143,0.582817,0.735471,0.888634,0.905477
3,s2d4,0.0582,0.872689,0.931155,0.125049,0.751659,0.614832,0.707645,0.900886,0.92751
4,s2d5,0.1006,0.902758,0.874909,0.138881,0.773474,0.620914,0.81402,0.868497,0.923431
5,s3d10,0.0426,0.869046,0.931326,0.134223,0.79579,0.625322,0.720174,0.935331,0.96739
6,s3d3,0.0356,0.851561,0.971671,0.172021,0.779735,0.590462,0.64208,0.958223,0.97258
7,s3d6,0.0859,0.814175,0.970419,0.260126,0.640087,0.538489,0.57251,0.92065,0.944417


In [None]:
for i in tqdm(['s4d8', 's4d9']):
    atac = sc.read('data/'+i+'/atac-glue.h5ad')
    rna = sc.read('data/'+i+'/rna-glue.h5ad')
    guidance = nx.read_graphml('data/'+i+'/guidance.graphml.gz')
    
    scglue.models.configure_dataset(rna, "NB", use_highly_variable=True,use_layer="counts", use_rep="X_pca",use_obs_names=True)
    scglue.models.configure_dataset(atac, "NB", use_highly_variable=True,use_layer="counts", use_rep="X_lsi_red",use_obs_names=True)
    guidance_hvf = guidance.subgraph(chain(rna.var.query("highly_variable").index,atac.var.query("highly_variable").index)).copy()
    
    glue = scglue.models.fit_SCGLUE({"rna": rna, "atac": atac}, guidance_hvf,model=scglue.models.PairedSCGLUEModel,fit_kws={"directory": "glue"})
    
    rna.obsm["X_glue"] = glue.encode_data("rna", rna)
    atac.obsm["X_glue"] = glue.encode_data("atac", atac)
    adata = sc.concat([rna, atac], join = 'outer', label='batch', index_unique = '-', keys=["RNA", "ATAC"])
    
    # Evaluation metrics
    f = fcts.foscttm(rna.obsm["X_glue"], atac.obsm["X_glue"])
    lta = fcts.transfer_accuracy(rna.obsm["X_glue"], atac.obsm["X_glue"], rna.obs['cell_type'], atac.obs['cell_type'])
    alg_score = fcts.alignment_score(rna.obsm["X_glue"], atac.obsm["X_glue"])
    rmse = fcts.calculate_rmse(rna.obsm["X_glue"], atac.obsm["X_glue"])

    # Bio-conservation metrics
    sc.pp.neighbors(adata, use_rep='X_glue')
    scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
    nmi = scib.me.nmi(adata, cluster_key='cluster_key', label_key='cell_type')
    asw_label = scib.me.silhouette(adata, label_key='cell_type', embed='X_glue')
    ari = scib.me.ari(adata, cluster_key='cluster_key', label_key='cell_type')

    # Batch correction metrics
    asw_batch = scib.me.silhouette_batch(adata,batch_key='batch',label_key='cell_type',embed='X_glue',verbose=False)
    graph_connectivity = scib.me.graph_connectivity(adata, label_key='cell_type')
    
    results.append([i, f, lta, alg_score, rmse, nmi, asw_label, ari, asw_batch, graph_connectivity])
    df_results = pd.DataFrame(
        results,
        columns = ["donor", "foscttm", "label_transfer_accuracy", "alignment_score", "rmse", "nmi", "asw_label", "ari", "asw_batch", "graph_connectivity"]
    )
    df_results.to_csv('output/scglue_2.csv', index = False)

  0%|          | 0/2 [00:00<?, ?it/s]

[INFO] fit_SCGLUE: Pretraining SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 4337
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 346
[INFO] PairedSCGLUEModel: Setting `patience` = 29
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 15
[INFO] PairedSCGLUETrainer: Using training directory: "glue/pretrain"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.604, 'g_kl': 0.036, 'g_elbo': 0.64, 'x_rna_nll': 0.401, 'x_rna_kl': 0.008, 'x_rna_elbo': 0.408, 'x_atac_nll': 0.605, 'x_atac_kl': 0.004, 'x_atac_elbo': 0.609, 'dsc_loss': 0.691, 'vae_loss': 1.091, 'gen_loss': 1.057, 'joint_cross_loss': 1.004, 'real_cross_loss': 1.014, 'cos_loss': 0.408}, val={'g_nll': 0.598, 'g_kl': 0.036, 'g_elbo': 0.634, 'x_rna_nll': 0.405, 'x_rna_kl': 0.007, 'x_rna_elbo': 0.412, 'x_atac_nll': 0.605, 'x_atac_kl': 0.004, 'x_atac_elbo': 0.60

2023-05-06 12:50:08,987 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "155"...
[INFO] EarlyStopping: Restoring checkpoint "155"...


  self.pid = os.fork()


[INFO] fit_SCGLUE: Estimating balancing weight...
[INFO] estimate_balancing_weight: Clustering cells...
[INFO] estimate_balancing_weight: Matching clusters...
[INFO] estimate_balancing_weight: Matching array shape = (16, 18)...
[INFO] estimate_balancing_weight: Estimating balancing weight...
[INFO] fit_SCGLUE: Fine-tuning SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 4337
[INFO] PairedSCGLUEModel: Setting `align_burnin` = 58
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 346
[INFO] PairedSCGLUEModel: Setting `patience` = 29
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 15
[INFO] PairedSCGLUETrainer: Using training directory: "glue/fine-tune"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.437, 'g_kl': 0.027, 'g_elbo': 0.465, 'x_rna_nll': 0.392, 'x_rna_kl': 0.006, 'x_rna_elbo': 0.399, 'x_atac_nll': 0.595, 'x_atac_kl': 0.003, 'x_atac_elbo': 0.598, 'dsc_loss': 0.692, 'vae_loss': 1.064, 'gen_loss': 1.03, 'joint_cross_loss': 0.989, 'real_cross_loss': 0.998, 'cos_loss': 0.448}, val={'g_nll': 0.433, 'g_kl': 0.027, 'g_elbo': 0.461, 'x_rna_nll': 0.392, 'x_rna_kl': 0.006, 'x_rna_elbo': 0.398, 'x_at

2023-05-06 13:25:46,661 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


[INFO] EarlyStopping: Restoring checkpoint "94"...
[INFO] EarlyStopping: Restoring checkpoint "94"...


  self.pid = os.fork()
  scib.cl.opt_louvain(adata,label_key='cell_type',cluster_key='cluster_key',plot=False,inplace=True,force=True,verbose = False)
 50%|█████     | 1/2 [1:21:51<1:21:51, 4911.48s/it]

[INFO] fit_SCGLUE: Pretraining SCGLUE model...




[INFO] check_graph: Checking variable coverage...
[INFO] check_graph: Checking edge attributes...
[INFO] check_graph: Checking self-loops...
[INFO] check_graph: Checking graph symmetry...
[INFO] PairedSCGLUEModel: Setting `graph_batch_size` = 4737
[INFO] PairedSCGLUEModel: Setting `max_epochs` = 790
[INFO] PairedSCGLUEModel: Setting `patience` = 66
[INFO] PairedSCGLUEModel: Setting `reduce_lr_patience` = 33
[INFO] PairedSCGLUETrainer: Using training directory: "glue/pretrain"
[INFO] PairedSCGLUETrainer: [Epoch 10] train={'g_nll': 0.624, 'g_kl': 0.024, 'g_elbo': 0.648, 'x_rna_nll': 0.395, 'x_rna_kl': 0.011, 'x_rna_elbo': 0.406, 'x_atac_nll': 0.626, 'x_atac_kl': 0.009, 'x_atac_elbo': 0.635, 'dsc_loss': 0.692, 'vae_loss': 1.113, 'gen_loss': 1.079, 'joint_cross_loss': 1.017, 'real_cross_loss': 1.027, 'cos_loss': 0.304}, val={'g_nll': 0.626, 'g_kl': 0.025, 'g_elbo': 0.651, 'x_rna_nll': 0.397, 'x_rna_kl': 0.01, 'x_rna_elbo': 0.407, 'x_atac_nll': 0.637, 'x_atac_kl': 0.008, 'x_atac_elbo': 0.64

In [None]:
df_results = pd.DataFrame(
    results,
    columns = ["donor", "foscttm", "label_transfer_accuracy", "alignment_score", "rmse", "nmi", "asw_label", "ari", "asw_batch", "graph_connectivity"]
)
df_results.to_csv('output/scglue_2.csv', index = False)

In [37]:
df_results

Unnamed: 0,donor,foscttm,label_transfer_accuracy,alignment_score,rmse,nmi,asw_label,ari,asw_batch,graph_connectivity
0,s1d2,0.0422,0.842285,0.956476,0.169282,0.700974,0.585814,0.648648,0.910727,0.951778
1,s1d3,0.0326,0.843421,0.968863,0.149477,0.718436,0.57009,0.513514,0.937366,0.972907
2,s2d1,0.0815,0.86019,0.900669,0.168181,0.724143,0.582817,0.735471,0.888634,0.905477
3,s2d4,0.0582,0.872689,0.931155,0.125049,0.751659,0.614832,0.707645,0.900886,0.92751
4,s2d5,0.1006,0.902758,0.874909,0.138881,0.773474,0.620914,0.81402,0.868497,0.923431
5,s3d10,0.0426,0.869046,0.931326,0.134223,0.79579,0.625322,0.720174,0.935331,0.96739
6,s3d3,0.0356,0.851561,0.971671,0.172021,0.779735,0.590462,0.64208,0.958223,0.97258
7,s3d6,0.0859,0.814175,0.970419,0.260126,0.640087,0.538489,0.57251,0.92065,0.944417
8,s4d8,0.0535,0.878595,0.94437,0.130804,0.717416,0.58816,0.572616,0.932714,0.982795
9,s4d9,0.0424,0.825896,0.963974,0.141999,0.765939,0.585352,0.707864,0.942531,0.964065
