### Data integration algorithms comparison

In [None]:
import scanpy as sc
import hdf5plugin
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
import scanpy.external as sce
import numpy as np
import scanorama
import scib
import os
import warnings
warnings.filterwarnings('ignore')

In [None]:
sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=80, facecolor='white',figsize=(3,3),frameon=True)
sc.logging.print_header()
plt.rcParams['axes.unicode_minus']=False

In [None]:
DATASET_ACCESSION_NUMBER = "GSE181279"

### Read merged and preprocessed data and create copies for use with all 3 algorithms (each will be used separately)

In [None]:
adata = sc.read(f"./datasets/{DATASET_ACCESSION_NUMBER}_merged_qcdr.h5ad")
adata

In [None]:
adata_sc = adata.copy()
adata_sc

In [None]:
adata_bbknn = adata.copy()
adata_bbknn

### Data integration using BKNN

In [None]:
sce.pp.bbknn(adata_bbknn, batch_key='batch')  # running bbknn 1.3.6

In [None]:
adata_bbknn

In [None]:
sc.tl.tsne(adata_bbknn)

In [None]:
adata_bbknn

### Visualize BBKNN results

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.tsne(adata_bbknn,color=['batch_name'], title=' ', legend_fontsize=10)

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.tsne(adata_bbknn, color=['disease_type'], title=' ', legend_fontsize=10)

### Data integration with scanorama

In [None]:
# split per batch into new objects.
batches = adata.obs['batch'].cat.categories.tolist()
alldata = {}
for batch in batches:
    alldata[batch] = adata[adata.obs['batch'] == batch,]

alldata

In [None]:
adatas = list(alldata.values())

# run scanorama.integrate
scanorama.integrate_scanpy(adatas, dimred = 50)

In [None]:
# Get all the integrated matrices.
scanorama_int = [ad.obsm['X_scanorama'] for ad in adatas]

# make into one matrix.
all_s = np.concatenate(scanorama_int)
print(all_s.shape)

# add to the AnnData object, create a new object first
adata_sc = adata.copy()
adata_sc.obsm["Scanorama"] = all_s

In [None]:
adata_sc

In [None]:
# tsne and umap
sc.pp.neighbors(adata_sc, use_rep = "Scanorama")
sc.tl.tsne(adata_sc, use_rep = "Scanorama")

### Visualize results with SCANORAMA

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.tsne(adata_sc, color=['batch_name'], title=' ', legend_fontsize=10)

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.tsne(adata_sc, color=['disease_type'], title=' ', legend_fontsize=10)

### Data integration with Harmonypy

In [None]:
sce.pp.harmony_integrate(adata, 'batch')

In [None]:
sc.pp.neighbors(adata, use_rep = "X_pca_harmony")
sc.tl.tsne(adata, use_rep = "X_pca_harmony")

### Visualize results with Harmonypy

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.tsne(adata, color=['batch_name'], title=' ', legend_fontsize=10)

In [None]:
with rc_context({'figure.figsize': (6, 6)}):
    sc.pl.tsne(adata, color=['disease_type'], title=' ', legend_fontsize=10)