## Environment setup

In [1]:
x=1

In [2]:
import warnings
warnings.simplefilter(action='ignore')

import scanpy as sc
import anndata as ad
import numpy as np
import pandas as pd
import os
import scipy
import scipy.io
import rpy2.rinterface_lib.callbacks
import anndata2ri
import logging
import muon

import seaborn as sns
import matplotlib.pyplot as plt

from rpy2.robjects import r
from rpy2.robjects import pandas2ri

rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)

pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython

In [3]:
%%R
options(future.globals.maxSize = 32000 * 1024^2)

## Load datasets

In [None]:
multiom = ad.read_h5ad("GSE194122_openproblems_neurips2021_multiome_BMMC_processed.h5ad")
multiom

In [None]:
#bridge
multiom_bridge = multiom[multiom.obs['Samplename'].isin(['site3_donor7_multiome'])].copy()
cite_bridge = cite[cite.obs['Samplename'].isin(['site3_donor7_cite'])].copy()

atac_bridge = multiom_bridge[:, multiom_bridge.var['feature_types'] == 'ATAC'].copy()
rna_multiom_bridge = multiom_bridge[:, multiom_bridge.var['feature_types'] == 'GEX'].copy()

adt_bridge = cite_bridge[:, cite_bridge.var['feature_types'] == 'ADT'].copy()
rna_cite_bridge = cite_bridge[:, cite_bridge.var['feature_types'] == 'GEX'].copy()

In [36]:
# rna
adata_ = ad.AnnData(rna_multiom_bridge.layers['counts'].A)
adata_.obs_names = rna_multiom_bridge.obs_names
adata_.var_names = rna_multiom_bridge.var_names
adata_.obs['celltype'] = rna_multiom_bridge.obs['cell_type']
adata_.obs['batch'] = rna_multiom_bridge.obs['Samplename']

In [37]:
%%R -i adata_
rna = as.Seurat(adata_, data=NULL, counts='X')
multiom <- rna
multiom <- RenameAssays(object = multiom, originalexp = "RNA") 
multiom[["ATAC"]] <- CreateAssayObject(counts = atac@assays$originalexp@data)

In [38]:
%%R
multiom

An object of class Seurat 
129921 features across 1771 samples within 2 assays 
Active assay: RNA (13431 features, 0 variable features)
 2 layers present: counts, data
 1 other assay present: ATAC


In [45]:
%%R
saveRDS(multiom, file = "multiom_bridge.rds")

In [40]:
atac_query = sc.read("atac_query_s4d9_np.h5ad")

In [42]:
# atac
adata_ = ad.AnnData(atac_query.layers['counts'].A)
adata_.obs_names = atac_query.obs_names
adata_.var_names = atac_query.var_names
adata_.obs['celltype'] = atac_query.obs['cell_type']
adata_.obs['batch'] = atac_query.obs['Samplename']

In [43]:
%%R -i adata_
atac = as.Seurat(adata_, data=NULL, counts='X')

In [44]:
%%R
saveRDS(atac, file = "atac_query.rds")

### Setup ADT query

In [41]:
cite_query = ad.read_h5ad("GSE194122_openproblems_neurips2021_cite_BMMC_processed.h5ad")
cite_query = cite_query[cite_query.obs['Samplename'].isin(['site4_donor9_cite'])].copy()
cite_query

AnnData object with n_obs × n_vars = 7365 × 14087
    obs: 'GEX_n_genes_by_counts', 'GEX_pct_counts_mt', 'GEX_size_factors', 'GEX_phase', 'ADT_n_antibodies_by_counts', 'ADT_total_counts', 'ADT_iso_count', 'cell_type', 'batch', 'ADT_pseudotime_order', 'GEX_pseudotime_order', 'Samplename', 'Site', 'DonorNumber', 'Modality', 'VendorLot', 'DonorID', 'DonorAge', 'DonorBMI', 'DonorBloodType', 'DonorRace', 'Ethnicity', 'DonorGender', 'QCMeds', 'DonorSmoker', 'is_train'
    var: 'feature_types', 'gene_id'
    uns: 'dataset_id', 'genome', 'organism'
    obsm: 'ADT_X_pca', 'ADT_X_umap', 'ADT_isotype_controls', 'GEX_X_pca', 'GEX_X_umap'
    layers: 'counts'

In [42]:
adt_query = cite_query[:, cite_query.var['feature_types'] == 'ADT'].copy()
adt_query

AnnData object with n_obs × n_vars = 7365 × 134
    obs: 'GEX_n_genes_by_counts', 'GEX_pct_counts_mt', 'GEX_size_factors', 'GEX_phase', 'ADT_n_antibodies_by_counts', 'ADT_total_counts', 'ADT_iso_count', 'cell_type', 'batch', 'ADT_pseudotime_order', 'GEX_pseudotime_order', 'Samplename', 'Site', 'DonorNumber', 'Modality', 'VendorLot', 'DonorID', 'DonorAge', 'DonorBMI', 'DonorBloodType', 'DonorRace', 'Ethnicity', 'DonorGender', 'QCMeds', 'DonorSmoker', 'is_train'
    var: 'feature_types', 'gene_id'
    uns: 'dataset_id', 'genome', 'organism'
    obsm: 'ADT_X_pca', 'ADT_X_umap', 'ADT_isotype_controls', 'GEX_X_pca', 'GEX_X_umap'
    layers: 'counts'

In [22]:
adata_ = ad.AnnData(adt_query.layers['counts'].A)
adata_.obs_names = adt_query.obs_names
adata_.var_names = adt_query.var_names
adata_.obs['celltype'] = adt_query.obs['cell_type']
adata_.obs['batch'] = adt_query.obs['Samplename']

In [23]:
%%R -i adata_
query <- as.Seurat(adata_, data=NULL, counts='X')

In [24]:
%%R
query

An object of class Seurat 
134 features across 7365 samples within 1 assay 
Active assay: originalexp (134 features, 0 variable features)
 2 layers present: counts, data


In [25]:
%%R
saveRDS(query, file = "adt_query.rds")