### Preliminaries

In [None]:
import os
HOME = os.environ["HOME"]
CARDIAC_MOTION_GWAS_REPO = f"{HOME}/01_repos/CardiacMotionGWAS"
CARDIAC_MOTION_REPO = f"{HOME}/01_repos/CardiacMotion"
# MLRUNS_DIR = f"{CARDIAC_COMA_REPO}/mlruns"
os.chdir(CARDIAC_MOTION_REPO)

In [None]:
import mlflow
from mlflow.tracking import MlflowClient

import os, sys

import torch
import torch.nn.functional as F

from config.cli_args import overwrite_config_items
from config.load_config import load_yaml_config, to_dict

import ipywidgets as widgets
from ipywidgets import interact
from IPython.display import Image

import pandas as pd
import shlex
from subprocess import check_output

import pickle as pkl
import pytorch_lightning as pl

from argparse import Namespace
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
from IPython import embed
sys.path.insert(0, '..')

# import model.Model3D
# from utils.helpers import get_coma_args, get_lightning_module, get_datamodule
from copy import deepcopy
from pprint import pprint

from typing import List
from tqdm import tqdm

import pandas as pd

import pyvista as pv
from ipywidgets import interact, interactive, fixed, interact_manual

In [None]:
os.chdir(CARDIAC_MOTION_GWAS_REPO)

import glob
import os
import numpy as np
import pandas as pd
from IPython import embed
import functools

from data.gwas_details import GWAS_dict
GWAS_PATH = "data/other_gwas/original_files"

In [None]:
gwas_harmonized_pattern = "data/other_gwas/preprocessed_files/{prefix}__{phenotype}.tsv"
gwas_selected_snps_pattern = "data/other_gwas/preprocessed_files/{prefix}__{phenotype}__selected_snps.tsv"
COMA_GWAS_SUMMARY = "results/gwas_loci_summary_across_runs.csv"
LOGP_PATH = "results/log10p_for_selected_snps_across_gwas.csv"

In [None]:
from loci_mapping import LOCUS_TO_REGION, REGION_TO_LOCUS, LOCI_TO_DROP

### GWAS file - reduction

In [None]:
for file, info in GWAS_dict.items():
    prefix = info["prefix"]
    phenotype = info["phenotype"].replace(" ", "_")
    o_filename = f"{prefix}__{phenotype}.tsv"
    file = os.path.join(GWAS_PATH, file)
    df = pd.read_csv(file, sep="\t").rename(info["columns"], axis=1)    
    df = df[["CHR", "BP", "SNP", "P"]]
    df.to_csv(o_filename, sep='\t', header=True, index=False)

___

### Filter GWAS files for specific SNPS

#### Find significant SNPs across COMA runs.

In [None]:
gwas_loci_summary_across_runs_df

In [None]:
os.chdir(CARDIAC_GWAS_REPO)

In [None]:
gwas_loci_summary_across_runs_df = pd.read_csv(COMA_GWAS_SUMMARY)

# get index of best locus/variable
idx = gwas_loci_summary_across_runs_df.groupby(["region"])["P"].transform(min) == gwas_loci_summary_across_runs_df["P"]

best_association_per_region = gwas_loci_summary_across_runs_df[idx].sort_values("region")
best_snps = set(best_association_per_region.SNP)

In [None]:
best_association_per_region['locus_name'] = best_association_per_region.region.apply(lambda x: REGION_TO_LOCUS.get(x, x))
best_association_per_region

In [None]:
os.chdir(CARDIAC_COMA_REPO)

In [None]:
regions = { 
    f"{assoc[1].region} ({assoc[1].P:.1e})": assoc[1].region 
    for assoc in best_association_per_region.sort_values("P").iterrows() 
} 

del regions["chr6_79 (4.5e-20)"]

@interact
def manhattan(
    region=widgets.Select(options=regions, description="Locus: \n",),
    PC_adjusted=False
):
    
    from PIL import Image
    best_association_per_region
    _best_association_per_region = best_association_per_region.set_index("region")
    
    assoc = _best_association_per_region.loc[region]
    
    run_id, z_variable = assoc.run, assoc.pheno[-4:]
    
    if PC_adjusted: 
        manhattan_file = f"mlruns/1/{run_id}/artifacts/GWAS_adj_10PCs/figures/GWAS__{z_variable}__1_{run_id}__manhattan.png"
        qq_file = f"mlruns/1/{run_id}/artifacts/GWAS_adj_10PCs/figures/GWAS__{z_variable}__1_{run_id}__QQ-plot.png"
    else:
        manhattan_file = f"mlruns/1/{run_id}/artifacts/GWAS/figures/{z_variable}__manhattan.png"
        qq_file = f"mlruns/1/{run_id}/artifacts/GWAS/figures/{z_variable}__QQ-plot.png"

    print(run_id, z_variable)
    display(Image.open(manhattan_file))
    display(Image.open(qq_file))

#### Filter GWAS files for selected SNPs

In [None]:
for file, info in GWAS_dict.items():
    
    prefix = info["prefix"]
    phenotype = info["phenotype"].replace(" ", "_")
    
    o_filename = gwas_harmonized_pattern.format(prefix=prefix, phenotype=phenotype)           
    ofile_selected = gwas_selected_snps_pattern.format(prefix=prefix, phenotype=phenotype)           
    
    df = pd.read_csv(o_filename, sep="\t")
    keep = df.apply(lambda row: row.SNP in best_snps, axis=1)    
    
    print(ofile_selected)
    df[keep].to_csv(ofile_selected, sep="\t", index=False, header=True)        

In [None]:
pp = []

for file, info in GWAS_dict.items():
    
    prefix = info["prefix"]
    phenotype = info["phenotype"].replace(" ", "_")
        
    ofile_selected_snps = gwas_selected_snps_pattern.format(prefix=prefix, phenotype=phenotype)
    
    df = pd.read_csv(ofile_selected, sep="\t")
    df = df[["SNP", "P"]]
    df = df.rename({"P": f"{prefix}__{phenotype}"}, axis=1)
    
    pp.append(df)
    

pp = [dd.set_index("SNP") for dd in pp]
snps_across_gwas = functools.reduce(lambda df1,df2: pd.merge(df1,df2,on='SNP'), pp)

# WHAT'S HAPPENING WITH THIS SNP?
snps_across_gwas = snps_across_gwas.drop("rs533885")
log10p_gwas_df = (-np.log10(snps_across_gwas))
log10p_gwas_df.to_csv(LOGP_PATH, index=True)

In [None]:
best_association_per_region

In [None]:
logp = pd.DataFrame(best_association_per_region.apply(lambda row: -np.log10(row.P), axis=1)).rename({0: '-log10(p)'}, axis=1)

In [None]:
z_assoc = pd.concat([best_association_per_region[["SNP", "region", "locus_name"]], logp], axis=1)

In [None]:
os.chdir(CARDIAC_GWAS_REPO)

In [None]:
log10p_gwas_df = pd.read_csv(LOGP_PATH)
log10p_gwas_df

In [None]:
assoc_df = pd.merge(z_assoc, log10p_gwas_df.set_index("SNP"), right_index=True, left_on="SNP").sort_values(by="-log10(p)", ascending=False)
assoc_df = assoc_df.set_index("SNP").drop("rs10872167").reset_index()
assoc_df = assoc_df.set_index(assoc_df.locus_name.where(assoc_df.locus_name != "Unnamed", assoc_df.region))

In [None]:
assoc_df.to_csv("results/log_p_assoc.csv")