In [None]:
# # PTB-XL Feature and Label Merge
# This notebook merges extracted patient-level ECG features with diagnostic labels from PTB-XL.

import pandas as pd

# ## 1. Load Data
features_path = "ptbxl_patient_level_features.csv"
labels_path = "ptbxl_database.csv"

df_features = pd.read_csv(features_path)
df_labels = pd.read_csv(labels_path)

print("Features shape:", df_features.shape)
print("Labels shape:", df_labels.shape)

# ## 2. Prepare for Merge
# Normalize filename_hr for merging

if "filename_hr" not in df_labels.columns:
    # Construct filename_hr from filename if necessary
    if "filename_hr" in df_labels.columns:
        pass
    elif "filename_hr" not in df_labels.columns and "filename_hr" not in df_features.columns:
        raise ValueError("filename_hr column missing in labels file")

# Clean file name formatting
df_features["filename_hr"] = df_features["filename_hr"].str.replace(".dat", "").str.replace(".hea", "").str.strip()
df_labels["filename_hr"] = df_labels["filename_hr"].astype(str).str.replace(".dat", "").str.replace(".hea", "").str.strip()

# ## 3. Merge Features with Labels
df_merged = df_features.merge(df_labels, on="filename_hr", how="inner")
print("Merged shape:", df_merged.shape)

# ## 4. Save Merged Dataset
output_path = "ptbxl_merged_features.csv"
df_merged.to_csv(output_path, index=False)
print(f"Merged dataset saved to {output_path}")

# ## 5. Preview Merged Data
df_merged.head()
