# Cardiac index generation

For functional PCs, look at the `fPCA.ipynb` notebook.

In [2]:
import os

In [3]:
os.chdir("/home/user/01_repos/")

In [4]:
from paths import Paths

import os
import numpy as np
import pandas as pd
from typing import Dict

import matplotlib.pyplot as plt
from ipywidgets import interact
import ipywidgets as widgets

import glob
import re
from tqdm import tqdm

In [5]:
def get_id_from_path(file, cardiac_indices_regex=f"{Paths.Data.meshes_results}/(.*)/cardiac_indices.csv"):
    return re.match(cardiac_indices_regex, file).group(1)

def collect_cardiac_indices(output_file="data/cardiac_indices_mine_{n}.csv"):
    
    cardiac_indices_filepattern = f"{Paths.Data.meshes_results}/*/cardiac_indices.csv"
    cardiac_indices_regex = f"{Paths.Data.meshes_results}/(.*)/cardiac_indices.csv"
    
    files = glob.glob(cardiac_indices_filepattern)
    
    files = sorted(files)
    
    dfs = []
    for file in tqdm(files):
        df = pd.read_csv(file).assign(ID=get_id_from_path(file))
        df = df.replace(0, np.nan)
        dfs.append(df)
        
    all_indices = pd.concat(dfs)
    all_indices = all_indices[["ID", "timeframe", "LVV", "LVM", "RVV"]]
    all_indices["BVV"] = all_indices.LVV + all_indices.RVV
    all_indices.to_csv(output_file.format(n=len(files)), index=False)
    
    return all_indices

In [8]:
# all_indices_df = collect_cardiac_indices()
all_indices_df = pd.read_csv("CardiacMotionGWAS//data/cardiac_indices_mine_61390.csv")
all_indices_df = all_indices_df.set_index(["ID", "timeframe"])
all_indices_df.sample(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,LVV,LVM,RVV,BVV
ID,timeframe,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4486766,49,159832.0,147338.1,129115.0,288947.0
2188256,24,35975.0,78185.1,29299.0,65274.0
1006232,41,95970.0,119732.55,88840.0,184810.0
2259628,37,75496.0,77101.5,67620.0,143116.0
4438731,48,142265.0,143371.2,97843.0,240108.0
5040312,5,161649.0,145102.65,140660.0,302309.0
3089523,21,40549.0,90871.2,25891.0,66440.0
3346456,5,,,,
1554777,42,118097.0,105279.3,112867.0,230964.0
1935798,29,122639.0,122006.85,126037.0,248676.0


### Discard subjects with "bad" curves

Discard subjects with:
- many times where volume is higher than the supposed ED.
- implausible ES timeframes
- rows with any negative values

In [10]:
normalized_vols = all_indices_df.groupby("ID").transform(lambda x: x / x.iloc[0])
t_above_1 = (normalized_vols["LVV"] > 1.1).groupby("ID").aggregate(sum)
bad_ids = t_above_1[t_above_1 > 5].index
bad_ids = set(bad_ids)

def ff(x):
    try:
        return x[1]
    except:
        pass

end_systole_df = all_indices_df.groupby("ID").idxmin(axis=0)
es_phase = end_systole_df["BVV"].apply(ff)
es_phase = es_phase[~pd.isna(es_phase)]

bad_ids = bad_ids.union(set(es_phase[(es_phase < 10) | (es_phase > 35)].index))
bad_ids = bad_ids.union(all_indices_df[(all_indices_df < 0).any(axis=1)].index.get_level_values("ID").unique())

In [12]:
len(bad_ids)

1015

#### Examine volume curves for bad IDs

In [None]:
options = all_indices_df.index.get_level_values("ID").unique()[:50]
options = bad_ids
# options = ids
ids_w = widgets.Select(options=options)

@interact
def show_volume_curve(id=ids_w):
        
    BVV = all_indices_df[all_indices_df.index.get_level_values("ID") == id]["BVV"]#.iloc[:,0]
    #RVV = all_indices_df[all_indices_df.index.get_level_values("ID") == id].iloc[:,2]
    #BVV = LVV + RVV
    # print(BVV)
    plt.plot(BVV.to_numpy());

In [5]:
all_indices_df = all_indices_df[pd.Series(all_indices_df.index.get_level_values("ID")).apply(lambda x: x not in bad_ids).to_list()]

In [6]:
end_systole_indices = all_indices_df.groupby("ID").idxmin(axis=0).BVV
end_systole_indices = [x for x in end_systole_indices if isinstance(x, tuple)]

In [None]:
lvedv = all_indices_df[all_indices_df.index.get_level_values("timeframe") == 1].LVV.reset_index().drop("timeframe", axis=1).set_index("ID")
lvedv = lvedv.rename({"LVV": "LVEDV"}, axis=1)
rvedv = all_indices_df[all_indices_df.index.get_level_values("timeframe") == 1].RVV.reset_index().drop("timeframe", axis=1).set_index("ID")
rvedv = rvedv.rename({"RVV": "RVEDV"}, axis=1)

lvesv = all_indices_df.loc[end_systole_indices].LVV.reset_index().drop("timeframe", axis=1).set_index("ID")
lvesv = lvesv.rename({"LVV": "LVESV"}, axis=1)
rvesv = all_indices_df.loc[end_systole_indices].RVV.reset_index().drop("timeframe", axis=1).set_index("ID")
rvesv = rvesv.rename({"RVV": "RVESV"}, axis=1)

In [None]:
lvm = all_indices_df.reset_index()[["ID", "timeframe", "LVM"]]
lvm_ed = lvm[lvm.timeframe == 1].set_index("ID").drop("timeframe", axis=1).rename({"LVM": "LVM_ED"}, axis=1)
lvm_mean = lvm.groupby("ID").mean().drop("timeframe", axis=1).rename({"LVM": "LVM_MEAN"}, axis=1)

In [None]:
cardiac_indices = pd.concat([lvedv, lvesv, rvedv, rvesv, lvm_ed, lvm_mean], axis=1)
cardiac_indices = cardiac_indices.assign(LVSV=cardiac_indices.LVEDV-cardiac_indices.LVESV)
cardiac_indices = cardiac_indices.assign(RVSV=cardiac_indices.RVEDV-cardiac_indices.RVESV)
cardiac_indices = cardiac_indices.assign(LVEF=cardiac_indices.LVSV/cardiac_indices.LVEDV)
cardiac_indices = cardiac_indices.assign(RVEF=cardiac_indices.RVSV/cardiac_indices.RVEDV)
cardiac_indices.to_csv("data/cardiac_indices_ed_and_es.csv")

___

# Find FWHM of volume curves

In [None]:
def get_index_at_zero_height(indices, values):
    
    for index in indices:
        
        if (values[index] * values[index+1]) < 0:
            final_index = index + (abs(values[index]) / (abs(values[index+1]) + abs(values[index])))
            return final_index
    

def find_half_peak_width_with_midpoint(y_values):
    
    x_values = y_values.index
    
    # Find the index of the maximum value (peak)
    peak_index = y_values.argmin()

    # Calculate half of the peak's height
    half_height = max(y_values) - (max(y_values) - min(y_values)) / 2

    # Find the index of the points closest to half the peak's height on both sides
    left_index = get_index_at_zero_height(range(1, peak_index), (y_values - half_height))
    right_index = get_index_at_zero_height(range(peak_index, len(y_values)), (y_values - half_height))
    
    # Calculate the width at half of the peak's height
    width = right_index - left_index # x_values[right_index] - x_values[left_index]

    # Calculate the midpoint of the interval
    midpoint = (right_index + left_index) / 2

    return width, left_index, right_index, midpoint, peak_index


In [None]:
subject_ids = all_indices_df.index.get_level_values("ID").unique()

In [None]:
indices_dict = {}

for subject_id in tqdm(subject_ids):

    subject_curves = all_indices_df[all_indices_df.index.get_level_values("ID") == subject_id]
    subject_curves = subject_curves.reset_index().set_index("timeframe")
    
    try:
        width, left_t, right_t, midpoint = find_half_peak_width_with_midpoint(subject_curves.LVV)
        indices_dict[subject_id] = (width, left_t, right_t, midpoint)
    except:
        pass    
    
    # plt.plot(subject_curves.LVV.index, subject_curves.LVV)
    # plt.axvline(x=left_t, color='red', linestyle='--')
    # plt.axvline(x=midpoint, color='red', linestyle='--')
    # plt.axvline(x=right_t, color='red', linestyle='--')
    # plt.show()

In [None]:
fwhm_df = pd.DataFrame(indices_dict).transpose()
fwhm_df.columns = ["FWHM", "left_half_height_index", "right_half_height_index", "midpoint"]

In [None]:
fwhm_df[["FWHM", "midpoint"]].to_csv(f"/home/user/01_repos/CardiacMotionGWAS/data/LVV_FWHM.csv")

In [None]:
@interact
def show_curves_with_vl(subject_id=widgets.Select(options=fwhm_df.index)):
    
    subject_curves = all_indices_df[all_indices_df.index.get_level_values("ID") == subject_id]
    subject_curves = subject_curves.reset_index().set_index("timeframe")
    
    lines_for_subject = fwhm_df.loc[subject_id]
    
    plt.plot(subject_curves.LVV.index, subject_curves.LVV)
    plt.axvline(x=lines_for_subject.left_half_height_index, color='red', linestyle='--')
    plt.axvline(x=lines_for_subject.midpoint, color='red', linestyle='--')
    plt.axvline(x=lines_for_subject.right_half_height_index, color='red', linestyle='--')
    plt.show()

# Generate local indices

Read data

In [13]:
aha_thickness_df = pd.read_csv("aha_thickness.csv").set_index(["subject_id", "timeframe"]).replace(0, np.nan)
cardiac_indices = pd.read_csv("data/cardiac_indices_ed_and_es.csv").rename({"ID": "subject_id"}, axis=1).set_index("subject_id")
cbrt_lvedv_df = cardiac_indices.LVEDV**(1/3)
cbrt_lvedv_df.index.name = "ID"

cbrt_lvesv_df = cardiac_indices.LVESV**(1/3)
cbrt_lvesv_df.index.name = "ID"

### Wall thickness

In [14]:
common_indices = set(aha_thickness_df.index).intersection(end_systole_indices)
common_indices = [ (subject, phase) for subject, phase in common_indices if subject not in bad_ids ]

In [16]:
thickness_ES = aha_thickness_df.loc[common_indices]
thickness_ES.columns = [f"thickness_ES_AHA{str(x).zfill(2)}" for x in thickness_ES.columns]
thickness_ES = thickness_ES.reset_index()
thickness_ES = thickness_ES.rename({"subject_id": "ID"}, axis=1)
thickness_ES = thickness_ES.dropna()
thickness_ES = thickness_ES.drop("timeframe", axis=1)
thickness_ES = thickness_ES.set_index("ID")
thickness_ES = thickness_ES.merge(cbrt_lvesv_df, left_index=True, right_index=True)

rel_thickness_ES = thickness_ES.apply(lambda x: x / x.LVESV, axis=1)
rel_thickness_ES = rel_thickness_ES.drop("LVESV", axis=1)
rel_thickness_ES.columns = [ f"rel_{col}" for col in rel_thickness_ES.columns ]

thickness_ES = thickness_ES.drop("LVESV", axis=1)

In [17]:
thickness_ED = aha_thickness_df.reset_index()[
    aha_thickness_df.reset_index().timeframe == 0
].drop("timeframe", axis=1).reset_index(drop=True)

thickness_ED = thickness_ED.dropna()
thickness_ED = thickness_ED.rename({"subject_id": "ID"}, axis=1)
thickness_ED = thickness_ED.set_index("ID")
thickness_ED.columns = [f"thickness_ED_AHA{str(x).zfill(2)}" for x in thickness_ED.columns]
thickness_ED = thickness_ED.merge(cbrt_lvedv_df, left_index=True, right_index=True)
rel_thickness_ED = thickness_ED.apply(lambda x: x / x.LVEDV, axis=1)
rel_thickness_ED = rel_thickness_ED.drop("LVEDV", axis=1)
rel_thickness_ED.columns = [ f"rel_{col}" for col in rel_thickness_ED.columns ]

thickness_ED = thickness_ED.drop("LVEDV", axis=1)

In [23]:
thickness_ED["avg_thickness_ED"] = thickness_ED.mean(axis=1)
thickness_ES["avg_thickness_ES"] = thickness_ES.mean(axis=1)
rel_thickness_ED["avg_rel_thickness_ED"] = rel_thickness_ED.mean(axis=1)
rel_thickness_ES["avg_rel_thickness_ES"] = rel_thickness_ES.mean(axis=1)

In [25]:
thickness_ED.to_csv(f"{Paths.Repos.CARDIAC_MOTION_GWAS}/data/thickness_end_diastole.csv")
rel_thickness_ED.to_csv(f"{Paths.Repos.CARDIAC_MOTION_GWAS}/data/relative_thickness_end_diastole.csv")
thickness_ES.to_csv(f"{Paths.Repos.CARDIAC_MOTION_GWAS}/data/thickness_end_systole.csv")
rel_thickness_ES.to_csv(f"{Paths.Repos.CARDIAC_MOTION_GWAS}/data/relative_thickness_end_systole.csv")

### Wall thickening

In [None]:
import pickle as pkl

In [None]:
# ids = [x[:7] for x in os.listdir(f"{Paths.Repos.CARDIAC_MOTION}/notebooks/thicknesses/")]
# aha_thickness_lst = []
# 
# for id in tqdm(ids):
#     try:
#       aha_thickness = np.load(f"{Paths.Repos.CARDIAC_MOTION}/notebooks/thicknesses/{id}_thickness_per_aha.npy")
#       aha_thickness = pd.DataFrame(aha_thickness).reset_index().rename({"index": "timeframe"}, axis=1).assign(subject_id=lambda x: id).set_index(["subject_id", "timeframe"])
#       aha_thickness_lst.append(aha_thickness)
#     except FileNotFoundError:
#         pass
#     
# aha_thickness_df = pd.concat(aha_thickness_lst)
# aha_thickness_df.columns = aha_thickness_df.columns + 1
# aha_thickness_df.to_csv("aha_thickness.csv", index=True)

In [None]:
aha_thickening_df = aha_thickness_df.reset_index().groupby("subject_id").max() - aha_thickness_df.reset_index().groupby("subject_id").min()
aha_thickening_df = aha_thickening_df.drop("timeframe", axis=1)

In [None]:
thickening_rltv2vol = aha_thickening_df.merge(
    cbrt_lvedv_df, left_index=True, right_index=True
).apply(lambda x: x/x.LVEDV, axis=1)

In [None]:
kk = relative_wall_thickening[["1"]].merge(thickening_rltv2vol[["1"]], left_index=True, right_index=True)
plt.scatter(kk["1_x"], kk["1_y"], alpha=0.1, s=0.2)
plt.xlim((1.5, 3))
plt.ylim((0.05, 0.2))

In [None]:
relative_wall_thickening = aha_thickness_df.reset_index().groupby("subject_id").max() / aha_thickness_df.reset_index().groupby("subject_id").min()
relative_wall_thickening = relative_wall_thickening.drop("timeframe", axis=1)
relative_wall_thickening = relative_wall_thickening.dropna()
relative_wall_thickening = relative_wall_thickening[relative_wall_thickening["1"] > 1.1]
relative_wall_thickening.columns = [ f"relative_wall_thickness_aha{str(i).zfill(2)}" for i in range(1,18) ]
relative_wall_thickening.to_csv(f"{Paths.Repos.CARDIAC_MOTION}/data/relative_wall_thickening.csv")

In [None]:
plt.hist(relative_wall_thickening["1"], bins=100);

In [None]:
absolute_wall_thickening = aha_thickness_df.reset_index().groupby("subject_id").max() - aha_thickness_df.reset_index().groupby("subject_id").min()
absolute_wall_thickening = absolute_wall_thickening.drop("timeframe", axis=1)
absolute_wall_thickening = absolute_wall_thickening.loc[relative_wall_thickening.index]
absolute_wall_thickening.columns = [ f"absolute_wall_thickness_aha{str(i).zfill(2)}" for i in range(1,18) ]
absolute_wall_thickening.to_csv(f"{Paths.Repos.CARDIAC_MOTION}/data/absolute_wall_thickening.csv")

In [None]:
@interact
def show_curve_per_segment(
    subject_id=4932980,#aha_thickness_df.index.get_level_values("subject_id").unique()[:20],
    aha_segment=widgets.IntSlider(min=1,max=17)
):
    
    values = aha_thickness_df.loc[subject_id, str(aha_segment)]
    plt.plot(values.index, values)
    
    width, l_t, r_t, midpoint, peak_index = find_half_peak_width_with_midpoint(-values)
   
    plt.axvline(x=l_t, color='red', linestyle='--')
    plt.axvline(x=peak_index, color='red', linestyle='--')
    plt.axvline(r_t, color='red', linestyle='--')
    plt.show()