In [25]:
import os
os.chdir("/root/data/DBP_sa_bc/")
from os.path import join as pj
import argparse
import sys
sys.path.append("modules")
import utils
import numpy as np
import anndata as ad
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
# import scipy
import pandas as pd
import re

In [26]:
parser = argparse.ArgumentParser()
parser.add_argument('--task', type=str, default='lung_ts')
parser.add_argument('--experiment', type=str, default='e52')
parser.add_argument('--model', type=str, default='default')
parser.add_argument('--init_model', type=str, default='sp_latest')
parser.add_argument('--method', type=str, default='scETM')
parser.add_argument('--K', type=int, default='50')
o, _ = parser.parse_known_args()  # for python interactive
# o = parser.parse_args()

In [27]:
# Load break index
K = o.K
break_index_dir = pj("result", o.task, o.experiment, o.model, "predict", o.init_model)

if "DBP_sa_bc" in o.method:
    result_dir = pj("result", "comparison", o.task, o.method, o.experiment, o.init_model)
else:
    result_dir = pj("result", "comparison", o.task, o.method)
cfg_task = re.sub("_atlas|_generalize|_transfer|_ref_.*", "", o.task)
data_config = utils.load_toml("configs/data.toml")[cfg_task]
for k, v in data_config.items():
    vars(o)[k] = v
model_config = utils.load_toml("configs/model.toml")["default"]
if o.model != "default":
    model_config.update(utils.load_toml("configs/model.toml")[o.model])
for k, v in model_config.items():
    vars(o)[k] = v
o.s_joint, o.combs, *_ = utils.gen_all_batch_ids(o.s_joint, o.combs)
# output_dir = pj("result", "analysis", o.task, o.method, o.experiment, "fa")


In [28]:
# Load labels
if o.task == "wnn_rna":
    labels1 = []
    labels2 = []
    for raw_data_dir in o.raw_data_dirs:
        label = utils.load_csv(pj(raw_data_dir, "label", "meta.csv"))
        labels1 += utils.transpose_list(label)[10][1:]
        labels2 += utils.transpose_list(label)[11][1:]
    labels1 = np.array(labels1)
    labels2 = np.array(labels2)
    print(np.unique(labels1))
    print(np.unique(labels2))
elif o.task == "lung_ts":
    labels1 = []
    labels2 = []
    for raw_data_dir in o.raw_data_dirs:
        label = utils.load_csv(pj(raw_data_dir, "label", "meta.csv"))
        labels1 += utils.transpose_list(label)[13][1:]
        labels2 += utils.transpose_list(label)[14][1:]
    labels1 = np.array(labels1)
    labels2 = np.array(labels2)
    print(np.unique(labels1))
    print(np.unique(labels2))
elif o.task == "ga":
    labels1 = []
    # labels2 = []
    for raw_data_dir in o.raw_data_dirs:
        label = utils.load_csv(pj(raw_data_dir, "label", "meta.csv"))
        labels1 += utils.transpose_list(label)[4][1:]
    labels1 = np.array(labels1)
    print(np.unique(labels1))
    
# Load index   
index = np.loadtxt(pj(break_index_dir, "break_index.csv"), delimiter=",", dtype=int)

['Alveolar_Type1' 'Alveolar_Type2' 'B_cell_mature' 'B_cell_naive' 'Basal'
 'Blood_vessel' 'Ciliated' 'DC_1' 'DC_2' 'DC_Monocyte_Dividing'
 'DC_activated' 'DC_plasmacytoid' 'Fibroblast' 'Lymph_vessel'
 'Macrophage_Dividing' 'Macrophage_MARCOneg' 'Macrophage_MARCOpos'
 'Mast_cells' 'Monocyte' 'Muscle_cells' 'NK' 'NK_Dividing' 'Plasma_cells'
 'Secretory_club' 'T_CD4' 'T_CD8_CytT' 'T_cells_Dividing' 'T_regulatory']
['Alveolar' 'B' 'Basal' 'Blood' 'Ciliated' 'DC' 'Fibroblast' 'Lymph'
 'Macrophage' 'Mast' 'Monocyte' 'Muscle' 'NK' 'Plasma' 'Secretory' 'T']


In [29]:
if o.method == "DBP_sa_bc":
    # Load predicted latent variables
    o.mods = ["rna"]
    o.pred_dir = pj("result", o.task, o.experiment, o.model, "predict", o.init_model)
    pred = utils.load_predicted(o, input=False)
    
    #Break
    w = pred["w"]["joint"]
    c = pred["z"]["joint"][:, :o.dim_c]*w
    c_ord = c[:,index]
    c_bre = c_ord[:, :K]
    
    F = ["F{}".format(i+1) for i in range(c_bre.shape[1])]
    dfz = pd.DataFrame(c_bre)
    dfz.index = labels1
    dfz.columns = F
    # dfz = abs(dfz)
    # dfz = (dfz-dfz.min(axis=0))/(dfz.max(axis=0)-dfz.min(axis=0)) 
    mean_values = dfz.groupby(dfz.index).mean()
    # proportion = mean_values.div(mean_values.sum(axis=0))
    proportion = mean_values
elif o.method in ["mofa", "liger"]:
    z = utils.load_csv(pj(result_dir, "embeddings.csv"))
    z = np.array(z)[1:, 1:].astype(np.float32)
    F = ["F{}".format(i+1) for i in range(z.shape[1])]
    dfz = pd.DataFrame(z)
    dfz.index = labels1
    dfz.columns = F
    # dfz = abs(dfz)
    # dfz = (dfz-dfz.min(axis=0))/(dfz.max(axis=0)-dfz.min(axis=0)) 
    mean_values = dfz.groupby(dfz.index).mean()
    # proportion = mean_values.div(mean_values.sum(axis=0))
    proportion = mean_values
elif o.method in ["LDVAE", "scETM"]:
    z = utils.load_csv(pj(result_dir, "embeddings.csv"))
    z = np.array(z).astype(np.float32)
    F = ["F{}".format(i+1) for i in range(z.shape[1])]
    dfz = pd.DataFrame(z)
    dfz.index = labels1
    dfz.columns = F
    # dfz = abs(dfz)
    # dfz = (dfz-dfz.min(axis=0))/(dfz.max(axis=0)-dfz.min(axis=0)) 
    mean_values = dfz.groupby(dfz.index).mean()
    # proportion = mean_values.div(mean_values.sum(axis=0))
    proportion = mean_values
   


def scale_data(data):
    data = data.astype(float)
    abs_max = np.max(np.abs(data))
    scaled_data = np.divide(data, abs_max, out=np.zeros_like(data), where=abs_max!=0)
    return scaled_data


proportion = pd.DataFrame(scale_data(proportion.values))
proportion.index = mean_values.index
proportion.index.name = "Cell_type"
proportion.columns = F
proportion = abs(proportion)

# proportion = abs(proportion)
# proportion = (proportion-proportion.min(axis=0))/(proportion.max(axis=0)-proportion.min(axis=0))    
proportion.to_csv(pj(result_dir, "proportion.csv"))