In [1]:
import os, sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import mlflow

import ipywidgets as widgets
from ipywidgets import interact

import seaborn as sns
from scipy.cluster import hierarchy

import datetime
import pytz

from easydict import EasyDict

In [2]:
import cardio_mesh

from cardiac_motion import MLFLOW_URI
from cardiac_motion.utils.run_helpers import get_runs, Run
from cardiac_motion.config.Constants import SYNTHETIC_DATASET_PARAMS_MLFLOW

import cardiac_motion_upe
from cardiac_motion_upe.run_helpers import EnsembleGWASResults

In [3]:
from scipy.spatial import ConvexHull

def compute_convex_hull_volume(meshes):
    
    if isinstance(meshes, list) or len(meshes.shape) == 3:
        return [ compute_convex_hull_volume(mesh) for mesh in meshes ]
    
    elif len(meshes.shape) == 2:
        return ConvexHull(meshes).volume / 1000

In [4]:
runs_df = get_runs().drop(SYNTHETIC_DATASET_PARAMS_MLFLOW, axis=1)

In [5]:
ensemble_results = EnsembleGWASResults(
    root_dir=f"{cardiac_motion_upe.GWAS_RESULTS_DIR}/Unsupervised_spatiotemporal", 
    # top_n_runs_per_chamber=None, from_cached=True
)

2025-03-07 17:47:40 - INFO - Found 737 files under the folder /mnt/data/01_repos/CardiacMotionGWAS/results/gwas/Unsupervised_spatiotemporal/summaries
 62%|██████▏   | 459/737 [00:01<00:00, 377.43it/s]

517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary


 73%|███████▎  | 538/737 [00:01<00:00, 384.67it/s]

517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary


 89%|████████▉ | 657/737 [00:01<00:00, 386.45it/s]

517c5ae311f04a19ac87cf5629d1b479 not in the dictionary
517c5ae311f04a19ac87cf5629d1b479 not in the dictionary


100%|██████████| 737/737 [00:01<00:00, 372.49it/s]
2025-03-07 17:47:42 - INFO - Collected GWAS summary data for 721 phenotypes.
2025-03-07 17:47:42 - INFO - Concatenating...
2025-03-07 17:47:43 - INFO - Assigning dynamic/static label to phenotypes...
2025-03-07 17:47:44 - ERROR - '4786e2deaa6d4621bfe73b4c69095e33'


In [21]:
# ensemble_results.region_assocs_df.query("chamber == 'LA'").sort_values("P").head(40)

In [8]:
VARIABLE_TYPE = "dynamic"
idx_min_p_per_region = ensemble_results.region_assocs_df[(ensemble_results.region_assocs_df.P < 1e-9) & (ensemble_results.region_assocs_df.variable_type == VARIABLE_TYPE)].groupby(["region", "chamber"]).P.idxmin()

ensemble_results.region_assocs_df.loc[idx_min_p_per_region].to_csv(
    f"{cardiac_motion_upe.BASE_DIR}/results/best_assoc_per_region_only_dynamic.csv"
)

AttributeError: 'DataFrame' object has no attribute 'variable_type'

In [10]:
ensemble_results.region_assocs_df

Unnamed: 0,pheno,region,CHR,SNP,BP,AF,a_0,a_1,BETA,SE,T,P,expid,chamber,run,full_pheno,INFO,msd_static,msd_dynamic,rec_ratio
0,z000,chr1_1,1,rs116720752,1508331.0,0.003064,A,G,-0.549850,0.162570,-3.3822,0.000720,3,RV,4786e2deaa6d4621bfe73b4c69095e33,4786e2deaa6d4621bfe73b4c69095e33_z000,,,,
1,z000,chr1_10,1,rs112403579,12906910.0,0.049464,T,A,0.076013,0.019956,3.8090,0.000140,3,RV,4786e2deaa6d4621bfe73b4c69095e33,4786e2deaa6d4621bfe73b4c69095e33_z000,,,,
2,z000,chr1_100,1,rs773213342,196406419.0,0.871100,AT,A,0.034022,0.010660,3.1916,0.001416,3,RV,4786e2deaa6d4621bfe73b4c69095e33,4786e2deaa6d4621bfe73b4c69095e33_z000,,,,
3,z000,chr1_101,1,rs572048024,199064928.0,0.005106,G,GA,-0.185260,0.050761,-3.6496,0.000263,3,RV,4786e2deaa6d4621bfe73b4c69095e33,4786e2deaa6d4621bfe73b4c69095e33_z000,,,,
4,z000,chr1_102,1,rs72735453,199516164.0,0.039012,T,C,-0.064432,0.018228,-3.5347,0.000409,3,RV,4786e2deaa6d4621bfe73b4c69095e33,4786e2deaa6d4621bfe73b4c69095e33_z000,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1211996,z031,chr9_71,9,rs61502474,137238495.0,0.059853,C,T,-0.055688,0.013941,-3.9946,0.000065,,LA,f345e14868744300b03d222c8dbc38a7,f345e14868744300b03d222c8dbc38a7_z031,0.98913,,,
1211997,z031,chr9_72,9,rs17040188,138310488.0,0.003404,A,G,-0.332520,0.093362,-3.5616,0.000369,,LA,f345e14868744300b03d222c8dbc38a7,f345e14868744300b03d222c8dbc38a7_z031,0.96545,,,
1211998,z031,chr9_73,9,rs11103342,139021644.0,0.177840,T,C,0.028909,0.008611,3.3571,0.000788,,LA,f345e14868744300b03d222c8dbc38a7,f345e14868744300b03d222c8dbc38a7_z031,0.99698,,,
1211999,z031,chr9_8,9,9:7495422_GA_G,7495422.0,0.137710,GA,G,0.039810,0.009701,4.1036,0.000041,,LA,f345e14868744300b03d222c8dbc38a7,f345e14868744300b03d222c8dbc38a7_z031,0.96510,,,


In [18]:
ensemble_results.region_assocs_df = ensemble_results.region_assocs_df.set_index("run")

In [16]:
kk = ensemble_results.region_assocs_df.query("chamber == 'LV' and region == 'chr7_78'").sort_values("P").head(5).set_index("run")

In [29]:
for index in kk.index:
    # ensemble_results.region_assocs_df.loc[index]
    
    runid = index # ensemble_results.region_assocs_df.loc[index, "run"]
    zvar = ensemble_results.region_assocs_df.loc[index, "pheno"]
    
    runinfo = runs_df.loc[runs_df.index.get_level_values("run_id") == runid]
    run = Run(runinfo, load_model=False)
    run.plot_volume_curves(zvar=zvar)

NameError: name 'partition_mapping' is not defined

In [None]:
ensemble_results.region_assocs_df.loc[idx_min_p_per_region].sort_values("P").head(50)

Unnamed: 0,pheno,region,CHR,SNP,BP,AF,a_0,a_1,BETA,SE,...,P,expid,chamber,run,full_pheno,INFO,variable_type,msd_static,msd_dynamic,rec_ratio
138305,z012,chr14_3,14,rs365990,23861811.0,0.36932,A,G,-0.072002,0.00719,...,1.399587e-23,4.0,LV,3dcf6f11e88c4aa886a4e99114bed84f,3dcf6f11e88c4aa886a4e99114bed84f_z012,,dynamic,0.714291,1.144649,0.507603
37289,z011,chr12_17,12,rs4963772,24758480.0,0.15022,G,A,-0.096582,0.009741,...,3.7844260000000004e-23,3.0,RV,63b67e4bc99a4e5cba6cca13be52966e,63b67e4bc99a4e5cba6cca13be52966e_z011,,dynamic,0.811321,1.226442,0.410839
630682,z015,chr12_17,12,rs4963772,24758480.0,0.15022,G,A,-0.091901,0.009715,...,3.250873e-21,7.0,RA,ecf5189409b34a48b80365c8a8c5f6b7,ecf5189409b34a48b80365c8a8c5f6b7_z015,,dynamic,1.732509,2.292956,0.65643
1861070,z021,chr10_74,10,rs17617337,121426884.0,0.21464,C,T,-0.074891,0.007992,...,7.498942e-21,,LV,4478fecff9f840f29797cd28fc84e6e2,4478fecff9f840f29797cd28fc84e6e2_z021,0.9944,dynamic,0.328243,0.936281,inf
2121134,z027,chr6_78,6,rs72967533,118655020.0,0.47741,T,C,-0.061112,0.006606,...,2.328091e-20,,LV,d4a372ea29d8492592fae2971d274753,d4a372ea29d8492592fae2971d274753_z027,0.99207,dynamic,0.35954,0.781835,0.343003
609519,z014,chr3_28,3,rs6801957,38767315.0,0.59657,T,C,0.064239,0.007093,...,1.4060479999999998e-19,7.0,RA,d0f9a5af1a554f86a107319a8ceb19cd,d0f9a5af1a554f86a107319a8ceb19cd_z014,,dynamic,1.77477,2.179437,0.573915
669345,z008,chr12_17,12,rs11047539,24781446.0,0.14961,A,G,-0.08761,0.009734,...,2.344229e-19,8.0,AO,f6549c1a0ef44033a996abc0825f7385,f6549c1a0ef44033a996abc0825f7385_z008,,dynamic,0.809079,1.369405,0.445751
1774849,z019,chr6_79,6,rs4945623,118980556.0,0.45334,G,C,-0.059437,0.006606,...,2.3933159999999996e-19,,LV,177d5968e4ba4aef8bced3e38e110762,177d5968e4ba4aef8bced3e38e110762_z019,0.994,dynamic,0.611305,0.985543,0.425228
63590,z014,chr6_78,6,rs3951016,118559658.0,0.47072,T,A,-0.06232,0.007006,...,6.081349999999999e-19,3.0,RV,84afed1478284b23af12da3e05f7de22,84afed1478284b23af12da3e05f7de22_z014,,dynamic,0.944548,1.369728,0.478763
270948,z014,chr12_17,12,rs4963772,24758480.0,0.15022,G,A,-0.085711,0.009727,...,1.276439e-18,4.0,LV,b0e56885eace49de9ae80f7c81731ba1,b0e56885eace49de9ae80f7c81731ba1_z014,,dynamic,0.566121,0.932896,0.393794


In [None]:
voxelized_shapes = run.load_voxelizations()

In [None]:
filtered_shapes = voxelized_shapes[(voxelized_shapes.index.get_level_values("z_var") == z) & (voxelized_shapes.index.get_level_values("value") == j)]

# Volume curves for LA

In [None]:
runid = ensemble_results.region_assocs_df.iloc[499836].run

In [None]:
runid

'6fa1322269a346f9bf6ecf7ef97f2d53'

In [None]:
runinfo = runs_df.loc[runid]

In [None]:
run = Run(runinfo, load_model=True)

2024-02-26 23:45:35 - INFO - Loaded weights from checkpoint:
 /home/user/01_repos/CardiacMotion/mlruns/6/6fa1322269a346f9bf6ecf7ef97f2d53/artifacts/restored_model_checkpoint/epoch=398-step=292466.ckpt


In [None]:
z_grid = run.generate_synthetic_shapes_z_grid()

In [None]:
assoc_info = ensemble_results.region_assocs_df.loc[
    (ensemble_results.region_assocs_df.chamber == "LA") & 
    (ensemble_results.region_assocs_df.P < 5e-8) & 
    pd.Series(map(lambda x: x in {"chr12_40", "chr16_40"}, ensemble_results.region_assocs_df.region))
].sort_values("P").iloc[0]

In [None]:
ensemble_results.region_assocs_df.loc[
    (ensemble_results.region_assocs_df.chamber == "LA") & 
    (ensemble_results.region_assocs_df.P < 5e-8) & 
    pd.Series(map(lambda x: x in {"chr12_40", "chr16_40"}, ensemble_results.region_assocs_df.region))
].sort_values("P").drop(["pheno", "CHR", "expid", "run"], axis=1)

Unnamed: 0,region,SNP,BP,AF,a_0,a_1,BETA,SE,T,P,chamber,full_pheno,INFO,variable_type,msd_static,msd_dynamic,rec_ratio
499836,chr16_40,rs59686216,75317575.0,0.59976,A,G,-0.058554,0.007156,-8.183,2.844461e-16,LA,6fa1322269a346f9bf6ecf7ef97f2d53_z010,,dynamic,1.165559,1.749322,0.596765
454449,chr16_40,rs4888422,75470576.0,0.61452,A,G,-0.044027,0.007448,-5.911,3.4261e-09,LA,4b20860d732741bd917a15edc7542b7b_z014,,dynamic,1.299022,1.87787,0.601511
526732,chr16_40,rs150284594,75413242.0,0.59195,A,AAG,0.042027,0.007118,5.9041,3.573551e-09,LA,a8742574ffe343969168e820a0d516c8_z010,,dynamic,0.905095,1.421587,0.485721
481099,chr12_40,rs761210718,66400702.0,0.36882,AAG,A,0.040962,0.007227,5.6682,1.45278e-08,LA,5e1b8110328348d8812e8b28b1bee177_z015,,dynamic,1.141208,1.685662,0.5865
447479,chr12_40,rs10878359,66404624.0,0.62803,T,C,-0.040067,0.007171,-5.5875,2.317928e-08,LA,4b20860d732741bd917a15edc7542b7b_z010,,dynamic,1.299022,1.87787,0.601511
531529,chr12_40,rs761210718,66400702.0,0.36882,AAG,A,0.040106,0.007223,5.5524,2.835959e-08,LA,a8742574ffe343969168e820a0d516c8_z013,,dynamic,0.905095,1.421587,0.485721


In [None]:
runs_df = runs_df.set_index("run_id", drop=False)
runinfo = runs_df.loc[assoc_info.run]

In [None]:
shapes = list(run.generate_synthetic_shapes_z_grid().values())

In [None]:
compute_convex_hull_volume(shapes)

[[77.33269002128576,
  77.4657596762874,
  77.29192173080433,
  77.13185485815012,
  76.96495975405101,
  76.27528093666263,
  75.51817697134364,
  74.45198683204048,
  74.78906479051184,
  76.21775988029108,
  79.37476300260607,
  84.82283845606592,
  90.20800719582716,
  94.86371253522965,
  98.17348120622938,
  101.31674194341785,
  104.83643035846386,
  108.02773668842268,
  110.38193774641185,
  111.62868646881299,
  112.09422904571704,
  112.09763828071011,
  111.34655388099071,
  109.32883265664404,
  105.14265702899834,
  99.27641151808639,
  91.86145569050653,
  84.04510805744228,
  77.71977113524758,
  74.47945883990147,
  73.4686828181373,
  73.12313878989693,
  74.17862588308836,
  75.02453954985008,
  76.22067145842564,
  77.6999726479486,
  79.15767444368927,
  80.1541543104549,
  80.99028348665435,
  81.44821813383332,
  81.67868713037896,
  81.37391846334523,
  80.58470237618245,
  79.434620363026,
  78.5769559107733,
  78.03782196651758,
  78.2218434094505,
  78.395156

In [None]:
run.synthetic_shapes[("z015", 0)]

AttributeError: 'Run' object has no attribute 'synthetic_shapes'

In [None]:
time_axis = np.array(range(50))/50
plt.figure(figsize=(8, 6))

volume_for_lambda = {
    _lambda: [ x/1000 for x in compute_convex_hull_volume(run.synthetic_shapes[(assoc_info.pheno, _lambda)]) ]
    for _lambda in range(-3, 4)
}

num_colors = len(volume_for_lambda)
colors = plt.cm.get_cmap('viridis', num_colors)

min_volume = min(min(volume_for_lambda.values()))
max_volume = max(max(volume_for_lambda.values()))

for i, _lambda in enumerate(range(-3, 4)):
    plt.plot(time_axis, volume_for_lambda[_lambda], color=colors(i))
    
plt.text(-0.2, (min_volume+max_volume)/2, 'LA volume (mL)', va='center', rotation='vertical', fontfamily="serif", fontsize="x-large")
plt.text(0.43, min_volume - (max_volume-min_volume)*0.15, 'time', va='center', rotation='horizontal', fontfamily="serif", fontsize="x-large")
plt.show()

NameError: name 'compute_convex_hull_volume' is not defined

<Figure size 800x600 with 0 Axes>

In [None]:
regions = ["chr12_40", "chr1_124", "chr15_35", "chr7_78", "chr16_40", "chr6_7", "chr6_82", "chr12_2", "chr12_17", "chr12_23", "chr3_28", "chr8_82", "chr22_7", "chr20_22"]

@interact
def show_volume_curves_for_locus(region=widgets.Dropdown(options=regions)):
    
    assocs_for_region = ensemble_results.get_results_for_region(region, only_dynamic=True, exp_ids=["4","5"])
    runinfo = assocs_for_region.iloc[0]
    run = Run(runinfo, load_model=True, load_dataloader=False)
    
    global volumes
    volumes = run.plot_volume_curves(zvar=runinfo.pheno)
    thicknesses = run.compute_aha_thickness(zvar=runinfo.pheno, segments=1)
    
    display(
        results.region_assocs_df[
            (results.region_assocs_df.run == runinfo.run_id) & 
            (results.region_assocs_df.pheno == runinfo.pheno) & 
            (results.region_assocs_df.P < 5e-8) & (results.region_assocs_df.variable_type == "dynamic")
        ].sort_values("P")
    )

___

In [None]:
x = next(iter(run.dataloader))['s_t'].to("cuda:0")
z, savg_hat, st_hat = run.model(x[:, range(0, 50, 5), :, :])

In [None]:
subject = 4
thickness     = np.array(compute_thickness_per_aha(x.cpu().detach().numpy()[subject]))
thickness_hat = np.array(compute_thickness_per_aha(st_hat[subject].cpu().detach().numpy()))

In [None]:
@interact
def show_curves(aha_segment=widgets.IntSlider(min=0,max=16)):
    plt.plot(thickness[:, aha_segment], color="blue");
    plt.plot(thickness_hat[:, aha_segment], color="red");

In [None]:
time_axis = list(np.array(range(50))/50)

In [None]:
# results = EnsembleGWASResults(root_dir=f"{Paths.Data.gwas_results}/cardiac_indices_ed_and_es")

In [None]:
# relevant_regions = set(ensemble_results.get_lead_snps().region)
# results_cardiac_indices = results.region_assocs_df[results.region_assocs_df.region.apply(lambda x: x in relevant_regions)]
# results_cardiac_indices = results_cardiac_indices[ results_cardiac_indices.pheno != "MEAN"]
# len(results_cardiac_indices.region.unique())

In [None]:
(-np.log10(results_cardiac_indices.pivot(index="region", values="P", columns="run"))).to_csv(
    "/home/user/01_repos/CardiacMotionGWAS/results/log10p_cardiac_indices_ed_and_es.csv"
)