In [3]:
import os
import numpy as np
from skimage.io import imread, imsave
from skimage.color import rgb2gray
from skimage.filters import threshold_otsu
from skimage.morphology import dilation, opening, square
from skimage.restoration import denoise_bilateral
from skimage.util import img_as_ubyte
from scipy.ndimage import median_filter

# Input folders
dataset_paths = {
    'Abnormal Heartbeat': r"C:\Users\sihus\OneDrive\Desktop\MP DL\dataset2\Abnormal Heartbeat",
    'MI': r"C:\Users\sihus\OneDrive\Desktop\MP DL\dataset2\MI",
    'History of MI': r"C:\Users\sihus\OneDrive\Desktop\MP DL\dataset2\History of MI",
    'Normal': r"C:\Users\sihus\OneDrive\Desktop\MP DL\dataset2\Normal"
}

# Output folder 
output_root = r"C:\Users\sihus\OneDrive\Desktop\MP DL\processed_images"
os.makedirs(output_root, exist_ok=True)

# Preprocessing parameters 
CROP_TOP = 300
CROP_BOTTOM = 150
DILATION_SIZE = 2
MEDIAN_KERNEL_SIZE = 3  # for median filtering

# Function to preprocess and save ECG images
def convert_image_to_rgb(image_path, class_name, output_root):
    image = imread(image_path)
    base_name = os.path.splitext(os.path.basename(image_path))[0]

    # Crop out top/bottom margins
    image = image[CROP_TOP:image.shape[0] - CROP_BOTTOM, :, :]

    # Convert to grayscale
    gray = rgb2gray(image)

    # Median filter to remove noise while preserving edges
    filtered = median_filter(gray, size=MEDIAN_KERNEL_SIZE)

    # Otsu's thresholding
    thresh = threshold_otsu(filtered)
    binary = filtered < thresh  # ECG lines are darker

    # Morphological operations to enhance ECG lines
    binary = opening(binary, square(2))  # remove noise dots
    binary = dilation(binary, square(DILATION_SIZE))  # thicken lines

    # Convert binary to RGB image
    rgb = np.zeros((binary.shape[0], binary.shape[1], 3), dtype=np.uint8)
    rgb[binary] = [255, 255, 255]  # ECG signal as white
    rgb[~binary] = [0, 0, 0]       # background as black

    # Save the image
    output_dir = os.path.join(output_root, class_name)
    os.makedirs(output_dir, exist_ok=True)
    imsave(os.path.join(output_dir, base_name + "_cleaned.png"), rgb)

# Loop through dataset 
for class_name, class_dir in dataset_paths.items():
    print(f"Processing class: {class_name}")
    for file in os.listdir(class_dir):
        if file.lower().endswith(('.jpg', '.png')):
            full_path = os.path.join(class_dir, file)
            try:
                convert_image_to_rgb(full_path, class_name, output_root)
            except Exception as e:
                print(f"Error processing {file} in {class_name}: {e}")


Processing class: Abnormal Heartbeat
Processing class: MI
Processing class: History of MI
Processing class: Normal


In [5]:
import os
import shutil
import random
from tqdm import tqdm

# Configuration
input_dir = "processed_images"
output_dir = "split_images_uncropped"
split_ratio = {'train': 0.7, 'val': 0.15, 'test': 0.15}
random.seed(42)

# Ensure output directories exist
for split in ['train', 'val', 'test']:
    for class_name in os.listdir(input_dir):
        os.makedirs(os.path.join(output_dir, split, class_name), exist_ok=True)

# Process each class
for class_name in os.listdir(input_dir):
    class_path = os.path.join(input_dir, class_name)
    if not os.path.isdir(class_path):
        continue

    images = os.listdir(class_path)
    random.shuffle(images)

    total = len(images)
    train_end = int(split_ratio['train'] * total)
    val_end = train_end + int(split_ratio['val'] * total)

    splits = {
        'train': images[:train_end],
        'val': images[train_end:val_end],
        'test': images[val_end:]
    }

    for split, split_images in splits.items():
        for img in tqdm(split_images, desc=f"Copying {split} - {class_name}"):
            src = os.path.join(class_path, img)
            dst = os.path.join(output_dir, split, class_name, img)
            shutil.copy2(src, dst)


Copying train - Abnormal Heartbeat: 100%|██████████████████████████████████████████| 163/163 [00:00<00:00, 1039.63it/s]
Copying val - Abnormal Heartbeat: 100%|███████████████████████████████████████████████| 34/34 [00:00<00:00, 879.01it/s]
Copying test - Abnormal Heartbeat: 100%|██████████████████████████████████████████████| 36/36 [00:00<00:00, 773.68it/s]
Copying train - History of MI: 100%|████████████████████████████████████████████████| 120/120 [00:00<00:00, 886.43it/s]
Copying val - History of MI: 100%|████████████████████████████████████████████████████| 25/25 [00:00<00:00, 859.40it/s]
Copying test - History of MI: 100%|███████████████████████████████████████████████████| 27/27 [00:00<00:00, 921.74it/s]
Copying train - MI: 100%|██████████████████████████████████████████████████████████| 167/167 [00:00<00:00, 1089.03it/s]
Copying val - MI: 100%|███████████████████████████████████████████████████████████████| 35/35 [00:00<00:00, 879.88it/s]
Copying test - MI: 100%|████████████████