In [1]:
import os
import scipy.io as sio
import numpy as np
import pandas as pd

# ============================================================
# PATHS
# ============================================================
INPUT_DIR = r"E:\PROJECTS\CARDIAC-PROJECT-UPDATED\DATASET\4-SEGMENTED_DATA\train"
LABELS_CSV = r"E:\PROJECTS\CARDIAC-PROJECT-UPDATED\DATASET\4-SEGMENTED_DATA\train_segment_labels.csv"

OUTPUT_DIR = r"E:\PROJECTS\CARDIAC-PROJECT-UPDATED\DATASET\5-AUGMENTED_DATA\train"
os.makedirs(OUTPUT_DIR, exist_ok=True)

OUT_LABEL_CSV = r"E:\PROJECTS\CARDIAC-PROJECT-UPDATED\DATASET\5-AUGMENTED_DATA\train_augmented_labels.csv"

# ============================================================
# AUGMENTATION FUNCTIONS
# ============================================================
def add_gaussian_noise(signal, noise_level):
    noise = np.random.normal(0, noise_level, size=signal.shape)
    return signal + noise

def amplitude_scaling(signal, scale_range):
    scale = np.random.uniform(*scale_range)
    return signal * scale

def time_shift(signal, max_shift_samples):
    shift = np.random.randint(-max_shift_samples, max_shift_samples)
    if shift == 0:
        return signal
    if shift > 0:
        return np.pad(signal[:-shift], (shift, 0))
    else:
        return np.pad(signal[-shift:], (0, -shift))

# ============================================================
# LOAD LABELS
# ============================================================
labels_df = pd.read_csv(LABELS_CSV)
label_map = dict(zip(labels_df["segment_id"], labels_df["label"]))

augmented_labels = []

# ============================================================
# AUGMENTATION LOOP
# ============================================================
for file in os.listdir(INPUT_DIR):
    if not file.endswith(".mat"):
        continue

    segment_id = file.replace(".mat", "")
    mat_path = os.path.join(INPUT_DIR, file)

    data = sio.loadmat(mat_path)
    ecg = data["ecg"].squeeze().astype(np.float32)
    pcg = data["pcg"].squeeze().astype(np.float32)
    fs = int(data["fs"][0][0])

    label = label_map[segment_id]

    # --------------------------------------------------------
    # Original
    # --------------------------------------------------------
    sio.savemat(
        os.path.join(OUTPUT_DIR, segment_id + "_orig.mat"),
        {"ecg": ecg, "pcg": pcg, "fs": fs}
    )

    augmented_labels.append({
        "segment_id": segment_id + "_orig",
        "label": label
    })

    # --------------------------------------------------------
    # Augmentation 1: Gaussian Noise
    # --------------------------------------------------------
    ecg_n = add_gaussian_noise(ecg, 0.01 * np.std(ecg))
    pcg_n = add_gaussian_noise(pcg, 0.02)

    aug_id = segment_id + "_noise"
    sio.savemat(
        os.path.join(OUTPUT_DIR, aug_id + ".mat"),
        {"ecg": ecg_n, "pcg": pcg_n, "fs": fs}
    )

    augmented_labels.append({"segment_id": aug_id, "label": label})

    # --------------------------------------------------------
    # Augmentation 2: Amplitude Scaling
    # --------------------------------------------------------
    ecg_s = amplitude_scaling(ecg, (0.9, 1.1))
    pcg_s = amplitude_scaling(pcg, (0.85, 1.15))

    aug_id = segment_id + "_scale"
    sio.savemat(
        os.path.join(OUTPUT_DIR, aug_id + ".mat"),
        {"ecg": ecg_s, "pcg": pcg_s, "fs": fs}
    )

    augmented_labels.append({"segment_id": aug_id, "label": label})

    # --------------------------------------------------------
    # Augmentation 3: Noise + Scaling + PCG Time Shift
    # --------------------------------------------------------
    ecg_ns = amplitude_scaling(
        add_gaussian_noise(ecg, 0.01 * np.std(ecg)), (0.9, 1.1)
    )

    pcg_ns = amplitude_scaling(
        add_gaussian_noise(pcg, 0.02), (0.85, 1.15)
    )

    max_shift = int(0.05 * fs)  # 50 ms
    pcg_ns = time_shift(pcg_ns, max_shift)

    aug_id = segment_id + "_mix"
    sio.savemat(
        os.path.join(OUTPUT_DIR, aug_id + ".mat"),
        {"ecg": ecg_ns, "pcg": pcg_ns, "fs": fs}
    )

    augmented_labels.append({"segment_id": aug_id, "label": label})

# ============================================================
# SAVE AUGMENTED LABELS
# ============================================================
aug_df = pd.DataFrame(augmented_labels)
aug_df.to_csv(OUT_LABEL_CSV, index=False)

print(f"✅ Augmentation complete")
print(f"Total augmented samples: {len(aug_df)}")
print(f"Labels saved to: {OUT_LABEL_CSV}")


✅ Augmentation complete
Total augmented samples: 9084
Labels saved to: E:\PROJECTS\CARDIAC-PROJECT-UPDATED\DATASET\5-AUGMENTED_DATA\train_augmented_labels.csv
