In [1]:
import os, sys
HOME = os.environ["HOME"]
CARDIAC_COMA_REPO = "{HOME}/01_repos/CardiacCOMA/"

In [2]:
import mlflow

import torch
import torch.nn.functional as F

os.chdir(CARDIAC_COMA_REPO)
from config.load_config import load_yaml_config, to_dict

import ipywidgets as widgets
from ipywidgets import interact
from IPython.display import Image
from mlflow.tracking import MlflowClient

import pickle as pkl
import pytorch_lightning as pl

from argparse import Namespace
import matplotlib.pyplot as plt

#import surgeon_pytorch
#from surgeon_pytorch import Inspect, get_layers

import numpy as np
import pandas as pd
from IPython import embed
sys.path.insert(0, '..')

import model.Model3D
from utils.helpers import get_coma_args, get_lightning_module, get_datamodule
from copy import deepcopy
from pprint import pprint

from copy import deepcopy
from typing import List

FileNotFoundError: [Errno 2] No such file or directory: '{HOME}/01_repos/CardiacCOMA/'

In [None]:
from mlflow_helpers import \
    list_artifacts,\
    get_significant_loci,\
    get_metrics_cols, \
    get_params_cols, \
    get_runs_df, \
    get_good_runs,\
    summarize_loci_across_runs,\
    get_model_pretrained_weights

In [None]:
TRACKING_URI = f"file://{CARDIAC_COMA_REPO}/mlruns"
mlflow.set_tracking_uri(TRACKING_URI)

In [None]:
client = MlflowClient()

### Retrieve cardiac indices

In [None]:
timeframe = "1".zfill(3)
datafolder = "data/cardio/cardiac_indices"

In [None]:
df = pd.concat([pd.read_csv(f"{datafolder}/G{i}/LVRV_time{timeframe}.csv", index_col="case_id") for i in range(1,5)])

In [None]:
df

### Compute sphericity

In [None]:
import scipy
import math
import pandas as pd
import os
import shlex
from subprocess import check_output

from scipy.spatial import ConvexHull

import numpy as np
import pickle as pkl

import pyvista as pv
from ipywidgets import interact, interactive, fixed, interact_manual

In [None]:
# meshes = pkl.load(open("data/cardio/LV_meshes_at_ED_35k.pkl", "rb"))
# convhull = {}
# for i, (id, mesh) in enumerate(meshes.items()):
#     if i % 1000 == 0:
#         print(i)
#     convhull[id] = ConvexHull(mesh)

In [None]:
def sphericity_index(convex_hull):
    return((36 * math.pi * convex_hull.volume**2)**(1./3) / convex_hull.area)

In [None]:
sph_df = pd.read_csv("data/cardio/sphericity.csv")

In [None]:
df.index = df.index.astype(str)
sph_df = sph_df.set_index("id")
sph_df.index = sph_df.index.astype(str)

In [None]:
cardiac_indices_df = df.merge(sph_df, left_index=True, right_index=True)

In [None]:
cardiac_indices_df

In [None]:
def experiment_selection_widget():
    options = [exp.name for exp in mlflow.list_experiments()]

    experiment_w = widgets.Select(
      options=options,
      value="Cardiac - ED"
    )
    
    return experiment_w

exp_w = experiment_selection_widget()

@interact
def get_runs(exp_name=exp_w):  
  try:
    exp_id = mlflow.get_experiment_by_name(exp_name).experiment_id
    runs_df = get_runs_df(exp_name=exp_name, only_finished=True)
    metrics, params = get_metrics_cols(runs_df), get_params_cols(runs_df)  
    # display(runs_df.loc[:, [*metrics, *params]].drop("params.platform", axis=1).head(10))
    return runs_df
  except:
    pass

In [None]:
runs_df = get_runs_df(exp_name=exp_w.value, only_finished=True)

In [None]:
z_paths = runs_df.reset_index().apply(
    lambda row: ((row.experiment_id, row.run_id), row.artifact_uri.replace("file://", "") + f'''/output/z_adj_{row.experiment_id}_{row.run_id}.tsv'''), axis=1
)
z_paths = dict(z_paths.values.tolist())

In [None]:
z_dfs = {}

for run_id, z_path in z_paths.items():
    try:
        z_dfs[run_id] = pd.read_csv(z_path, sep="\t").set_index("ID")
    except FileNotFoundError:
        pass

In [None]:
def change_col_names(exper_id, run_id, kk):
    kk.columns = [f"{exper_id}_{run_id[:5]}_{col}" for col in kk.columns]
    return kk

In [None]:
z_dfs_renamed = [change_col_names(expid, runid, z_df) for (expid, runid), z_df in z_dfs.items()]

In [None]:
z_all_df = pd.concat(z_dfs_renamed, axis=1)

In [None]:
z_corr_df = z_all_df.corr().abs()

# Correlation between latent variables

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
corr_lst = np.triu(z_corr_df.to_numpy()).flatten()

In [None]:
plt.hist(corr_lst[~(corr_lst == 1.) & ~(corr_lst == 0.)], bins=20);

In [None]:
z_all_df = z_all_df.set_index(z_all_df.index.astype(str))

In [None]:
z_all_df.index

In [None]:
common_subjects = list(set(cardiac_indices_df.drop_duplicates().index).intersection(set(z_all_df.drop_duplicates().index)))

In [None]:
len(common_subjects)

In [None]:
ccii = cardiac_indices_df.loc[sorted(common_subjects)].drop_duplicates()

In [None]:
zz.shape

In [None]:
zz["1_e6490_z000"].corr(ccii.LVSph)

In [None]:
ccii.corrwith(zz[0], axis=1)

In [None]:
zz = z_all_df.loc[sorted(common_subjects)].drop_duplicates()

In [None]:
ccii.isna().count()

In [None]:
ccii.to_csv("lvedv_lvm_rvedv_lvsph.csv", index=True, index_label="ID")

In [None]:
corr_matrix = pd.concat([zz, ccii], axis=1).corr() #.index.str.startswith("LV")

In [None]:
z_corr = zz.corr()

In [None]:
corr_z_vs_indices = corr_matrix[ccii.columns]

In [None]:
corr_z_vs_indices.to_csv("data/cardio/corr_z_vs_indices.csv", index=True, index_label="phenotype")

In [None]:
len([ind for ind in cardiac_indices_df.index if ind in z_all_df.index])

In [None]:
cardiac_indices_df.index

In [None]:
pd.read_csv("data/cardio/corr_z_vs_indices.csv")