In [1]:
import os
import pandas as pd
from typing import Dict

import matplotlib.pyplot as plt
from ipywidgets import interact
import ipywidgets as widgets

CARDIAC_COMA_REPO = f"{os.environ['HOME']}/01_repos/CardiacCOMA"

def get_cardiac_indices():
        
    timeframes = [str(i).zfill(3) for i in range(1, 50+1)]
    datafolder = f"{CARDIAC_COMA_REPO}/data/cardio/cardiac_indices"
    
    dfs = []
    for timeframe in timeframes:        
        df = pd.concat([
          pd.read_csv(f"{datafolder}/G{i}/LVRV_time{timeframe}.csv", index_col="case_id") 
          for i in range(1,5)
        ])
        df = df.assign(timeframe=timeframe)
        dfs.append(df)
        
    df = pd.concat(dfs)
    df = df.reset_index().set_index(['case_id', 'timeframe'])
    
    return df

def get_end_systolic_timeframes() -> Dict:
    
    END_SYS_TIMEFRAMES = f"{CARDIAC_COMA_REPO}/data/cardio/end_systole_timeframes.csv"
    
    dd = [ (x.id, str(x.end_systole_index).zfill(3)) for i, x in pd.read_csv(END_SYS_TIMEFRAMES).iterrows() ]
    
    return dd


def get_end_diastolic_timeframes() -> Dict:
    
    END_SYS_TIMEFRAMES = f"{CARDIAC_COMA_REPO}/data/cardio/end_systole_timeframes.csv"
    
    dd = [ (x.id, "001") for i, x in pd.read_csv(END_SYS_TIMEFRAMES).iterrows() ]
    
    return dd

In [2]:
volume_df = get_cardiac_indices()
volume_df = volume_df.sort_index()

In [3]:
@interact
def show_volume_curve(id=widgets.Select(options=volume_df.index.get_level_values("case_id").unique()[:50])):
    
    kk = volume_df[volume_df.index.get_level_values("case_id") == id].iloc[:,0]
    plt.plot(kk.to_numpy());

interactive(children=(Select(description='id', options=(1000215, 1000336, 1000363, 1000380, 1000407, 1000434, …

In [4]:
region_assocs_df = pd.read_csv("/home/rodrigo/01_repos/CardiacMotionGWAS/results/all_associations_best_per_region.csv")

In [5]:
exp_id = "3"

In [6]:
def fetch_loci_mapping():
    
    import requests
    from io import StringIO
    # https://docs.google.com/spreadsheets/d/1LbILFyaTHeRPit8v3gwx2Db4uS1Hnx6dibeGHK9zXcU/edit?usp=sharing
    # LINK = 'https://docs.google.com/spreadsheet/ccc?key=1LbILFyaTHeRPit8v3gwx2Db4uS1Hnx6dibeGHK9zXcU&output=csv'
    LINK = 'https://docs.google.com/spreadsheet/ccc?key=1XvVDFZSvcWWyVaLaQuTpglOqrCGB6Kdf6c78JJxymYw&output=csv'
    response = requests.get(LINK)
    assert response.status_code == 200, 'Wrong status code'
    loci_mapping_df = pd.read_csv(
        StringIO(response.content.decode()),
        sep=","
    ).set_index("region")
    
    return loci_mapping_df

In [7]:
loci_df = fetch_loci_mapping()

In [8]:
dynamic_loci = loci_df[~loci_df.only_dynamic.isnull()].index

In [9]:
@interact
def show_volume_curve(region=widgets.Select(options=dynamic_loci), which_extreme=widgets.Checkbox()):
    
    assocs_for_region = region_assocs_df[(region_assocs_df.region == region) & (region_assocs_df.expid == "X3")].sort_values("P")
    runid, zvar = assocs_for_region.run.iloc[0], assocs_for_region.pheno.iloc[0]
    
    MLFLOW_TRACKING_URI = f"{os.environ['HOME']}/01_repos/CardiacMotion/mlruns/"
    z_df = pd.read_csv(f"{MLFLOW_TRACKING_URI}/{exp_id}/{runid}/artifacts/latent_vector.csv")
    z_df = z_df.set_index("ID")
    z_df = z_df[zvar]
    # print()
    z_df = z_df.drop(set(ids_feos).intersection(z_df.index))
    
    ids_top = z_df[z_df > z_df.quantile(0.99)].index.to_list()
    ids_bottom = z_df[z_df < z_df.quantile(0.01)].index.to_list()
    
    if which_extreme:
        ids = set(ids_bottom).intersection(volume_df.index.get_level_values("case_id"))
    else:
        ids = set(ids_top).intersection(volume_df.index.get_level_values("case_id"))
    
    for id in list(ids)[:50]:        
        kk = volume_df[volume_df.index.get_level_values("case_id") == id].iloc[:,0]
        if len(kk) != 50:
            continue
        plt.plot(kk.to_numpy()/kk.iloc[0]);

interactive(children=(Select(description='region', options=('chr1_5', 'chr1_107', 'chr1_118', 'chr3_28', 'chr3…

In [13]:
import numpy as np

In [18]:
ids = [x[:7] for x in os.listdir("/home/rodrigo/01_repos/CardiacMotion/data/thicknesses/")]

In [19]:
np.load("/home/rodrigo/01_repos/CardiacMotion/data/thicknesses/1000215_thickness_per_aha.npy")

(50, 17)

In [46]:
aha_thickness_lst = []
for id in ids:
    try:
      aha_thickness = np.load(f"/home/rodrigo/01_repos/CardiacMotion/data/thicknesses/{id}_thickness_per_aha.npy")
      aha_thickness = pd.DataFrame(aha_thickness).reset_index().rename({"index": "timeframe"}, axis=1).assign(subject_id=lambda x: id).set_index(["subject_id", "timeframe"])
      aha_thickness_lst.append(aha_thickness)
    except FileNotFoundError:
        pass

In [47]:
aha_thickness_df = pd.concat(aha_thickness_lst)

In [54]:
aha_thickness_df.

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
subject_id,timeframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
4305589,0,4.408707,5.438991,8.699946,7.293068,4.805453,6.093532,5.871246,4.729550,6.745433,8.318595,5.252271,6.557948,6.608489,6.003170,6.852224,6.570515,6.849149
4305589,1,4.299850,5.370710,8.821410,7.479640,4.696481,5.865532,5.677747,4.626704,6.669958,8.265118,5.033722,6.199977,6.473427,5.954152,6.574781,6.215128,6.607017
4305589,2,4.185653,5.260313,8.659714,7.411335,4.606374,5.709787,5.712825,4.566908,6.688507,8.259472,5.020565,6.261362,6.561679,6.113524,6.588739,6.208053,6.716912
4305589,3,4.311679,5.399271,8.566120,7.247109,4.773534,5.928116,5.899192,4.715051,6.801800,8.272139,5.190833,6.580440,6.720671,6.209472,6.800464,6.528518,6.945863
4305589,4,4.534400,5.689459,8.626673,7.271320,5.060733,6.214196,6.005643,4.783818,6.817751,8.378364,5.389844,6.830173,6.822239,6.110757,6.872331,6.755327,6.924328
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5733702,45,5.599268,6.528843,9.096674,7.552526,4.893432,6.700122,6.748923,5.108354,7.415311,9.160182,6.052199,7.843151,7.210152,7.068819,7.331299,7.152646,7.195482
5733702,46,5.551963,6.478180,8.907543,7.374985,4.865488,6.677384,6.734124,5.135115,7.328893,9.098469,6.005125,7.784189,7.268034,7.127250,7.376049,7.189196,7.331838
5733702,47,5.329280,6.220796,8.583465,7.197787,4.719181,6.499273,6.527290,4.994314,7.088156,8.934456,5.818436,7.513484,7.146656,7.018718,7.242469,7.023096,7.261381
5733702,48,5.206666,6.084897,8.369320,7.041084,4.628309,6.391046,6.376649,4.872652,6.970689,8.814599,5.697511,7.340452,6.985889,6.905425,7.172629,6.883384,7.139301


In [49]:
aha_thickness_df.to_csv("aha_thickness.csv", index=True)

In [74]:
aha_thickness_df.index.get_level_values("subject_id") == 

Index(['4305589', '4305589', '4305589', '4305589', '4305589', '4305589',
       '4305589', '4305589', '4305589', '4305589',
       ...
       '5733702', '5733702', '5733702', '5733702', '5733702', '5733702',
       '5733702', '5733702', '5733702', '5733702'],
      dtype='object', name='subject_id', length=3000000)

In [82]:
@interact
def show_thickness_curve(
    region=widgets.Select(options=dynamic_loci), which_extreme=widgets.Checkbox(),
    aha_segment=widgets.IntSlider(min=0, max=17)
):
    
    assocs_for_region = region_assocs_df[(region_assocs_df.region == region) & (region_assocs_df.expid == "X3")].sort_values("P")
    runid, zvar = assocs_for_region.run.iloc[0], assocs_for_region.pheno.iloc[0]
    
    MLFLOW_TRACKING_URI = f"{os.environ['HOME']}/01_repos/CardiacMotion/mlruns/"
    z_df = pd.read_csv(f"{MLFLOW_TRACKING_URI}/{exp_id}/{runid}/artifacts/latent_vector.csv")
    z_df = z_df.set_index("ID")
    z_df = z_df[zvar]
    # print()
    z_df = z_df.drop(set(ids_feos).intersection(z_df.index))
    
    ids_top = z_df[z_df > z_df.quantile(0.99)].index.to_list()
    ids_bottom = z_df[z_df < z_df.quantile(0.01)].index.to_list()
    
    if which_extreme:        
        ids = set(ids_bottom).intersection(aha_thickness_df.index.get_level_values("subject_id").unique().astype(int))
    else:
        ids = set(ids_top).intersection(aha_thickness_df.index.get_level_values("subject_id").unique().astype(int))
    
    # volume_df[volume_df.index.get_level_values("case_id") == id].iloc[:,0]
    for id in list(ids)[:10]: 
        # print(aha_thickness_df.index.get_level_values("subject_id") == str(id))
        kk = aha_thickness_df[aha_thickness_df.index.get_level_values("subject_id") == str(id)].iloc[:,aha_segment]
        # print(kk)
        if len(kk) != 50:
            continue
        plt.plot(kk.to_numpy()/kk.iloc[0]);
        plt.ylim([0.9, 3])

interactive(children=(Select(description='region', options=('chr1_5', 'chr1_107', 'chr1_118', 'chr3_28', 'chr3…

In [11]:
normalized_vols = volume_df.groupby("case_id").transform(lambda x: x/x.iloc[0])
t_above_1 = (normalized_vols["LVEDV (mL)"] > 1).groupby("case_id").aggregate(sum)
ids_feos = t_above_1[t_above_1 > 5].index
ids_feos

Int64Index([1000590, 1000892, 1001629, 1003065, 1003814, 1005995, 1006929,
            1007469, 1013023, 1013345,
            ...
            4986456, 4987449, 4987912, 4988345, 4988483, 4989669, 4989728,
            4990164, 4992639, 4994894],
           dtype='int64', name='case_id', length=3530)

In [21]:
@interact
def show_volume_curve(id=widgets.Select(options=ids_feos)):
    
    kk = volume_df[volume_df.index.get_level_values("case_id") == id].iloc[:,0]
    plt.plot(kk.to_numpy());

interactive(children=(Select(description='id', options=(1017591, 1098773, 1165317, 1191279, 1328149, 1452949, …