In [2]:
root="C:/github/2023/RatDeconvolution"
path_package="D:/Gdrive/publication/ratdeconv"

## Deconvolution

In [3]:
import sys
import warnings

import pandas as pd
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn import metrics

sys.path.append(path_package)
from deconv.deconv.deconv import Deconvolution

plt.rcParams["font.size"] = 22
plt.rcParams['font.family'] = 'sans-serif'
warnings.simplefilter('ignore')

In [4]:
def annotation_sample(df, df_b):
    temp = df_b.loc[df.columns.tolist()]
    name = temp["COMPOUND_NAME"].tolist()
    dose = temp["DOSE_LEVEL"].tolist()
    time = temp["SACRIFICE_PERIOD"].tolist()
    ind = [f"{i}_{j}_{k}" for i, j, k in zip(name, dose, time)]
    return ind

def estimate(df_mix, df_ref, number=150, trimming=True, trans="log2",norm=["quantile"]):
    dat = Deconvolution()
    dat.set_data(df_mix, df_ref)
    dat.preprocessing_mix(
                        trimming=trimming,batch=False,combat=False,
                        trans_method=trans,
                        norm_method_list=norm)
    dat.deg(method="ttest",
            sep="_",number=number,limit_CV=1.,limit_FC=1.,log2=True,q_limit=0.05,
            intersection=trimming,
            plot=True)
    dat.preprocessing_ref(
            trimming=False,batch=False,combat=False,
            trans_method=trans,
            norm_method_list=["raw"])
    df_ref_processed = dat.get_data()[1]
    dat.set_data(df_mix, df_ref_processed)
    dat.preprocessing_mix(
                        trimming=False,batch=False,combat=False,
                        trans_method=trans,
                        norm_method_list=norm)    
    dat.fit(method="elasticnet")
    df_res = dat.get_res()
    return df_res


In [6]:
# load transcriptome
df_target = pd.read_csv(f"{root}/data/tggate_transcriptome.csv",index_col=0)
df_sample = pd.read_csv(f"{root}/data/tggate_sample_information.csv",index_col=0)
df_target.columns=[str(i) for i in df_target.columns]
df_sample.index=[str(i) for i in df_sample.index]
df_target.columns = annotation_sample(df_target, df_sample)
print(df_target.shape)

# load ref raw file
df_ref = pd.read_csv(f"{root}/data/rnaseq_reference.csv", index_col=0)
df_ref = df_ref.loc[:,[
    'B_1', 'B_2', 'B_3', 'B_4', 'B_5', 'B_6', 
    'CD4T_1', 'CD4T_2', 'CD4T_3', 'CD4T_4', 'CD4T_5', 'CD4T_6',
    'CD8T_1', 'CD8T_2', 'CD8T_3', 'CD8T_4', 'CD8T_5', 'CD8T_6', 
    'NK_1', 'NK_2', 'NK_3', 'NK_4', 'NK_5', 'NK_6',
    'Mon_1', 'Mon_2', 'Mon_3', 'Mon_4', 'Mon_5', 
    'Mon_6', 'Neu_1', 'Neu_2', 'Neu_3', 'Neu_4', 'Neu_5', 'Neu_6', 
]]

(13323, 645)


In [None]:
df_res = estimate(df_target, df_ref, number=200, trimming=True, trans="log2",norm=["quantile"])
df_res.to_csv(f"{root}/result/tggate_result.csv")

## Evaluation