In [None]:
import os, sys

os.chdir(f"{os.environ['HOME']}/01_repos")

from paths import Paths

from easydict import EasyDict
import matplotlib.pyplot as plt
import numpy as np
import mlflow
import ipywidgets as widgets
from ipywidgets import interact

import numpy as np
import seaborn as sns

import scipy
# scipy.spatial.distance.pdist() # to list possible distance metri

import pandas as pd
from tqdm import tqdm
import glob

In [None]:
from CardiacMotionGWAS.helpers import EnsembleGWASResults, fetch_loci_mapping
from CardiacMotion.utils.run_helpers import Run, compute_thickness_per_aha

___

# LV wall thickness

In [None]:
lvwt_results = EnsembleGWASResults(root_dir=f"{Paths.Data.gwas_results}/LVWT")

In [None]:
region_counts = lvwt_results.region_assocs_df[lvwt_results.region_assocs_df.P < 5e-8].groupby("region").count().pheno
region_gw_mt5 = region_counts[region_counts >= 5]
region_sw = lvwt_results.region_assocs_df[lvwt_results.region_assocs_df.P < 5e-10].region.unique()
relevant_regions = set(region_sw).union(set(region_gw_mt5.index))

In [None]:
assocs_for_relevant_regions = lvwt_results.region_assocs_df[lvwt_results.region_assocs_df.region.apply(lambda region: region in relevant_regions)]
lead_snp_df = assocs_for_relevant_regions.loc[assocs_for_relevant_regions.groupby("region").P.idxmin()]
lead_snp_df = lead_snp_df.sort_values(["CHR", "BP"])

In [None]:
ld_regions = pd.read_csv("/home/user/01_repos/GWAS_pipeline/data/ld_indep_regions/fourier_ls-all_EUR_hg19_named.bed")
region_ranges = ld_regions.apply(lambda row: f"chr{str(row.chr).zfill(2)}_{row.start}-{row.stop}", axis=1)
region_ranges = { ld_regions.id[i]: region_range for i, region_range in enumerate(region_ranges) }
region_to_snp = lead_snp_df.set_index("region").SNP.to_dict()

In [None]:
thickness_phenos = [ x for x in assocs_for_relevant_regions.pheno.unique() if "thickness" in x ]

In [None]:
dfs = []

for region in tqdm(lead_snp_df.region):
    region_range = region_ranges[region]
    region_data = f"/mnt/data/workshop/workshop-user1/output/GWAS/LVWT/by_region/snps_info/{region_range}__snps_data.tsv"
    region_data_df = pd.read_csv(region_data, sep='\t')
    for pheno in thickness_phenos:
        
        filename = f"/mnt/data/workshop/workshop-user1/output/GWAS/LVWT/by_region/{pheno}/{region_range}.tsv"
        snp = region_to_snp[region]
        if os.path.exists(filename):
            df = pd.read_csv(filename, sep='\t').assign(pheno=pheno,region=region)
            df = pd.concat([region_data_df, df], axis=1)
            # print(df.columns)
            df = df[df.SNP == snp]
            dfs.append(df)        
            
thickness_pvals_df = pd.concat(dfs, axis=0).pivot(index="pheno", values="P", columns="region").reset_index()
thickness_pvals_df["segment"] = thickness_pvals_df.pheno.apply(lambda x: re.match(".*(aha..|average|avg).*", x.lower()).group(1))
thickness_pvals_df["phase"] = thickness_pvals_df.pheno.apply(lambda pheno: "ED" if "ED" in pheno else "ES")
thickness_pvals_df["magnitude"] = "thickness"
thickness_pvals_df["rel_or_abs"] = thickness_pvals_df.pheno.apply(lambda pheno: "relative" if "rel" in pheno else "absolute")
thickness_pvals_df = thickness_pvals_df.sort_values(["magnitude", "phase", "rel_or_abs"])
thickness_pvals_df = thickness_pvals_df.drop("pheno", axis=1)
thickness_pvals_df = thickness_pvals_df.set_index(["segment", "phase", "magnitude", "rel_or_abs"])
thickness_pvals_df.columns = [ region_to_snp[x] for x in thickness_pvals_df.columns ]

In [None]:
lead_snps = set(lead_snp_df.SNP)

In [None]:
# files = sorted(glob.glob("/mnt/data/workshop/workshop-user1/output/GWAS/LVWT/*thickening*tsv"))
# thickening_phenos = [ x.split("__")[1][:-4] for x in files ]
#
# thickening_dfs = []
# 
# for i, file in tqdm(enumerate(files)):
#     pheno = thickening_phenos[i]
#     df = pd.read_csv(file, sep="\t")
#     df = df.loc[df.SNP.apply(lambda x: x in lead_snps)]
#     df = df.assign(pheno=pheno)
#     thickening_dfs.append(df)
#     
# thickening_df = pd.concat(thickening_dfs)
# thickening_df.to_csv("kk.csv")

In [None]:
thickening_pvals_df = pd.read_csv("CardiacMotionGWAS/results/LV_wall_thickening_pvals.csv")
thickening_pvals_df["segment"] = thickening_pvals_df.pheno.apply(lambda x: re.match(".*(aha..).*", x).group(1))
thickening_pvals_df["magnitude"] = "thickening"
thickening_pvals_df["rel_or_abs"] = thickening_pvals_df.pheno.apply(lambda pheno: "relative" if "rel" in pheno else "absolute")
thickening_pvals_df["phase"] = None

thickening_pvals_df = thickening_pvals_df.sort_values(["magnitude", "phase", "rel_or_abs"])
thickening_pvals_df = thickening_pvals_df.drop("Unnamed: 0", axis=1)# thickening_pvals_df

thickening_pvals_df = thickening_pvals_df.pivot(index=["segment", "phase", "magnitude", "rel_or_abs"], values="P", columns="SNP").reset_index()
thickening_pvals_df.set_index(["segment", "phase", "magnitude", "rel_or_abs"])

In [None]:
import re

In [None]:
thickening_pvals_df = thickening_pvals_df.set_index(["segment", "phase", "magnitude", "rel_or_abs"])

In [None]:
pvals = pd.concat([
    thickness_pvals_df,
    thickening_pvals_df#.set_index(['segment', 'phase', 'magnitude', 'rel_or_abs'])  
])


In [None]:
pvals.to_csv("/home/user/01_repos/CardiacMotionGWAS/results/LVWT_pvals.csv")   

In [None]:
pvals_df = pd.read_csv("/home/user/01_repos/CardiacMotionGWAS/results/LVWT_pvals.csv")
pvals_df = pvals_df.set_index(["magnitude", "rel_or_abs", "phase", "segment"])

In [None]:
log_pvals = -np.log10(pvals_df)

In [None]:
log_pvals.transpose()["thickness", "absolute", "ED"]

In [None]:
pvals.set_index(["segment", "phase", "magnitude", "rel_or_abs"])

In [None]:
abs_thickening_results = EnsembleGWASResults(root_dir=f"{Paths.Data.gwas_results}/absolute_wall_thickening")
abs_thickening_results = abs_thickening_results.region_assocs_df[abs_thickening_results.region_assocs_df.region.apply(lambda x: x in relevant_regions)]
abs_wt_log10p_df = (-np.log10(abs_thickening_results.pivot(index="region", values="P", columns="pheno")))

rel_thickening_results = EnsembleGWASResults(root_dir=f"{Paths.Data.gwas_results}/relative_wall_thickening")
rel_thickening_results = rel_thickening_results.region_assocs_df[rel_thickening_results.region_assocs_df.region.apply(lambda x: x in relevant_regions)]
rel_wt_log10p_df = (-np.log10(rel_thickening_results.pivot(index="region", values="P", columns="pheno")))

REL_THICKENING_FILE = "/home/user/01_repos/CardiacMotionGWAS/results/log10p_rel_thickening.csv"
ABS_THICKENING_FILE = "/home/user/01_repos/CardiacMotionGWAS/results/log10p_abs_thickening.csv"

best_logp_thickening_df = pd.merge(
    pd.DataFrame(abs_wt_log10p_df.max(axis=1)), 
    pd.DataFrame(rel_wt_log10p_df.max(axis=1)), 
    left_index=True, right_index=True
).rename({"0_x": "abs_wall_thickening", "0_y": "rel_wall_thickening"}, axis=1)

best_logp_thickening_df.to_csv(f"{Paths.Repos.CARDIAC_MOTION_GWAS}/results/log10p_wall_thickening")