In [None]:
import os, sys

os.chdir(f"{os.environ['HOME']}/01_repos")

from paths import Paths
import datetime
import pytz
import pandas as pd

from CardiacMotion.utils.run_helpers import fhm_mesh
from CardiacMotion.utils.run_helpers import Run, get_runs

In [None]:
runs_df = get_runs()

In [None]:
REFERENCE_DATE = pytz.utc.localize(datetime.datetime(2024, 2, 10, 0, 0, 0))

In [None]:
runs_df = runs_df[runs_df.start_time > REFERENCE_DATE]

In [None]:
MLRUNS_DIR = f"{Paths.Repos.CARDIAC_MOTION}/mlruns"

In [None]:
z_df_merged = []

for index, row in runs_df.iterrows():

    exp_id, run_id = row.experiment_id, row.run_id
    latent_vector_file = f"{MLRUNS_DIR}/{exp_id}/{run_id}/artifacts/latent_vector.csv"
    try:
        z_df = pd.read_csv(latent_vector_file)
    except FileNotFoundError:
        print(f"File {latent_vector_file} not found. Skipping...")
        continue
        
    z_df = z_df.set_index("ID")
    z_df.columns = [ f"{z}_{run_id}" for z in z_df.columns ]
    z_df = z_df.sort_index()
    z_df.index = z_df.index.astype(int)
    z_df_merged.append(z_df)

z_df_merged = pd.concat(z_df_merged, axis=1)

z_df_merged.to_csv(
    f"{Paths.Repos.CARDIAC_MOTION_GWAS}/data/latent_vector_all_runs_after_Feb2024.csv", 
    index=True, index_label="ID", float_format='%.8f'
)