In [1]:
import argparse
import os
import pathlib
import subprocess
import sys
import time

import anndata
import numpy as np
import pandas as pd
import scanpy as sc
import torch
# import yaml
from sklearn.preprocessing import normalize

In [3]:
# RNA preprocessing
counts_rna = sc.read("/data1/xcx/unpaired-data/uterus/RNA/highvar_counts_rna.mtx", cache = True)

sc.tl.pca(
    counts_rna, n_comps=min(100, counts_rna.shape[0]),
    use_highly_variable=False, svd_solver="auto"
)

counts_rna = counts_rna.obsm["X_pca"]
# counts_rna = counts_rna.todense()

label_rna = sc.read("/data1/xcx/unpaired-data/uterus/RNA/Uterus_Wang_2020_selected.h5ad", cache = True).obs["cell_type"].values
label_atac = sc.read("/data1/xcx/unpaired-data/uterus/ATAC/uterus_Zhang2021/adata_anno_selected.h5ad", cache = True).obs["celltype"].values

In [4]:
counts_atac = sc.read("/data1/xcx/unpaired-data/uterus/ATAC/uterus_Zhang2021/highvar_counts_atac.mtx", cache = True).X.toarray()

from test_unioncom_acc import lsi_ATAC

counts_atac = np.where(counts_atac < 1, 0, 1)
counts_atac = lsi_ATAC(counts_atac, k = min(101, counts_atac.shape[0]))

In [5]:
X = counts_rna
Y = counts_atac

X = normalize(X, norm="l2")
Y = normalize(Y, norm="l2")

In [7]:
output_dir = "/home/xcx/results/uterus/3-MMD-MA/"

K1 = X @ X.T
K2 = Y @ Y.T

df = pd.DataFrame(data=K1)
df.to_csv(os.path.join("/home/xcx/results/uterus/3-MMD-MA/K1.csv"), index=False)
df = pd.DataFrame(data=K2)
df.to_csv(os.path.join("/home/xcx/results/uterus/3-MMD-MA/K2.csv"), index=False)

In [8]:
print(K1.shape)
print(K2.shape)

(8237, 8237)
(8314, 8314)


In [31]:
import datetime
starttime = datetime.datetime.now()

# # run in terminal
# python /home/xcx/MYBenchmark-codes/3-MMD-MA/demos/manifoldAlignDistortionPen_mmd_multipleStarts_uterus.py /home/xcx/results/uterus/3-MMD-MA/K1.csv /home/xcx/results/uterus/3-MMD-MA/K2.csv --l1 1e-5 --l2 1e-5 --p 5 --bandwidth 0.5 --seed 50

# subprocess.call([
#     "python", "/home/xcx/MYBenchmark-codes/3-MMD-MA/demos/manifoldAlignDistortionPen_mmd_multipleStarts.py",
#     output_dir + "K1.npy", output_dir + "K2.npy",
#     --l1 "1e-5", --l2 "1e-5", --p "5", --bandwidth "0.5", --seed "50"
# ])

print("MMD-MA finished...")

#long running
endtime = datetime.datetime.now()
print((endtime-starttime).seconds)

MMD-MA finished...


In [9]:
alpha = np.loadtxt("/home/xcx/MYBenchmark-codes/3-MMD-MA/demos/train_uterus/alpha_hat_50_10000.txt")
beta = np.loadtxt("/home/xcx/MYBenchmark-codes/3-MMD-MA/demos/train_uterus/beta_hat_50_10000.txt")
import pandas as pd
K1 = pd.read_csv("/home/xcx/results/uterus/3-MMD-MA/K1.csv", header=0, index_col=None).to_numpy()
K2 = pd.read_csv("/home/xcx/results/uterus/3-MMD-MA/K2.csv", header=0, index_col=None).to_numpy()

rna_latent = K1 @ alpha
atac_latent = K2 @ beta
 
# #long running
# endtime = datetime.datetime.now()
# print((endtime-starttime).seconds)

In [10]:
df = pd.DataFrame(data=rna_latent)
df.to_csv(os.path.join("/home/xcx/results/uterus/3-MMD-MA/z_rna.csv"), index=False)
df = pd.DataFrame(data=atac_latent)
df.to_csv(os.path.join("/home/xcx/results/uterus/3-MMD-MA/z_atac.csv"), index=False)