In [None]:
import pandas as pd
import torch
import tensorqtl
import os


from tensorqtl import genotypeio, cis, trans, post, susie
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"torch: {torch.__version__} (CUDA {torch.version.cuda}), device: {device}")
print(f"pandas {pd.__version__}")

os.chdir("/data/podo/Projects/project_HS/202404-sceQTLv7/")


In [None]:
list_files = sorted(os.listdir("01_calling_eQTL/01_00_pseudobulk_pipeline/tensorqtl_input/"))
list_pathes = [os.path.join("01_calling_eQTL/01_00_pseudobulk_pipeline/tensorqtl_input/", i) for i in list_files]

In [None]:
for i in range(8,len(list_pathes)):
    path = list_pathes[i]
    prefix = path.split("/")[3][:-4]
  
    df_path = pd.read_csv(path, sep = "\t").set_index('name')
    
    # parsing
    geno_prefix = df_path.value['geno_prefix']
    pheno_path =  df_path.value['pheno_path']
    covs_path =  df_path.value['covs_path']
    interaction_path = df_path.value['interaction_path']
    
    
    # load phenotypes and covariates
    phenotype_df, phenotype_pos_df = tensorqtl.read_phenotype_bed(pheno_path)
    covariates_df = pd.read_csv(covs_path, sep='\t', index_col=0).T
    covariates_df = pd.merge(covariates_df.iloc[:,:15], covariates_df.iloc[:,15].str.get_dummies(sep=','), right_index=True, left_index=True).astype('float')
    pr = genotypeio.PlinkReader(geno_prefix)
    genotype_df = pr.load_genotypes()
    variant_df = pr.bim.set_index('snp')[['chrom', 'pos']]
    interaction_df = pd.read_csv(interaction_path, sep='\t', index_col=0).T
    print(all(phenotype_df.columns==covariates_df.index))
    
    # significant_stats
    out_prefix = "01_calling_eQTL/01_01_tensorqtl_out/"+prefix
    df_cis = cis.map_cis(genotype_df, variant_df,
                phenotype_df.loc[phenotype_pos_df['chr'].isin(['chr'+str(i) for i in range(1,23)])],
                phenotype_pos_df.loc[phenotype_pos_df['chr'].isin(['chr'+str(i) for i in range(1,23)])],
                covariates_df=covariates_df, maf_threshold = 0.05)
    #tensorqtl.calculate_qvalues(df_cis, fdr = 0.1)
    df_cis.to_csv(out_prefix+".map_cis.txt.gz", sep = "\t")
    
    out_prefix = "01_calling_eQTL/01_02_tensorqtl_nominal/"+prefix
    print(all(phenotype_df.columns==covariates_df.index))
    print(out_prefix)
    
    # nominal
    cis.map_nominal(genotype_df, variant_df,
        phenotype_df.loc[phenotype_pos_df['chr'].isin(['chr'+str(i) for i in range(1,23)])],
        phenotype_pos_df.loc[phenotype_pos_df['chr'].isin(['chr'+str(i) for i in range(1,23)])],
        covariates_df=covariates_df, prefix = out_prefix,
        run_eigenmt=True, maf_threshold=0.05)
    
    out_prefix = "01_calling_eQTL/01_03_tensorqtl_susie/"+prefix
    
    # susie
    df_cis = susie.map(genotype_df, variant_df,
                phenotype_df.loc[phenotype_pos_df['chr'].isin(['chr'+str(i) for i in range(1,23)])],
                phenotype_pos_df.loc[phenotype_pos_df['chr'].isin(['chr'+str(i) for i in range(1,23)])],
                covariates_df=covariates_df, maf_threshold=0.05, max_iter=500)
    df_cis.to_csv(out_prefix+".susie_mapped.txt.gz", sep = "\t")