# ECG Images Preprocessing
Taking a single patients ECG image and seperates them into 12 images, one for each lead
retrieved from

In [21]:
import os
import re
from skimage.filters import threshold_otsu, gaussian
from skimage import measure, color, morphology, filters,io
from skimage.transform import resize
from skimage.io import imread, imsave
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

## Extracting and Preprocessing Images
    
Extracts individual leads from an ECG image, preprocesses them to remove noise 
and keep only the ECG signal, and saves the smaller resulting images

Arguments:
  image_file: Name of the input image file.
  parent_folder: Path to the folder containing the image file.
  output_folder: Path to the folder where the processed lead images will be saved.

In [22]:
def extract_and_preprocess_leads(image_file, parent_folder, output_folder):
    # Read the image
    image = imread(os.path.join(parent_folder, image_file))

    # Dividing the ECG leads with offset
    start_offset = 30 

    Lead_1 = image[300:600, 150 + start_offset:643]
    Lead_2 = image[300:600, 646 + start_offset:1135]
    Lead_3 = image[300:600, 1140 + start_offset:1626]
    Lead_4 = image[300:600, 1630 + start_offset:2125]
    Lead_5 = image[600:900, 150 + start_offset:643]
    Lead_6 = image[600:900, 646 + start_offset:1135]
    Lead_7 = image[600:900, 1140 + start_offset:1626]
    Lead_8 = image[600:900, 1630 + start_offset:2125]
    Lead_9 = image[900:1200, 150 + start_offset:643]
    Lead_10 = image[900:1200, 646 + start_offset:1135]
    Lead_11 = image[900:1200, 1140 + start_offset:1626]
    Lead_12 = image[900:1200, 1630 + start_offset:2125]

    Leads=[Lead_1,Lead_2,Lead_3,Lead_4,Lead_5,Lead_6,Lead_7,Lead_8,Lead_9,Lead_10,Lead_11,Lead_12]

     # Extract the original filename without extension
    base_filename = os.path.splitext(image_file)[0]

    # Extract the class label from the base filename
    class_label = re.match(r'([^\(]+)', base_filename).group(1) 
        # Create class subfolders within the output folder if they don't exist

    folder_name = re.sub('.jpg', '', image_file)
    output_path = os.path.join(output_folder, folder_name)

    class_output_folder = os.path.join(output_folder, class_label)
    os.makedirs(class_output_folder, exist_ok=True)

    for x, lead_img in enumerate(Leads):
        # Convert to grayscale
        grayscale = color.rgb2gray(lead_img)
        # Smooth the image
        blurred_image = gaussian(grayscale, sigma=0.7)
        # Thresholding
        global_thresh = filters.threshold_otsu(blurred_image)
        binary_global = blurred_image < global_thresh 
        # Morphological Operations (Connect broken segments)
        binary_global = morphology.closing(binary_global, morphology.square(3)) 
        # Resize
        binary_global = resize(binary_global, (180,230))
       
        # Find contours to isolate the ECG signal
        contours = measure.find_contours(binary_global, 0.8)
        contours_shape = sorted([x.shape for x in contours])[::-1][0:1]
        # Create a blank image to draw the extracted signal
        extracted_signal = np.zeros_like(binary_global)
        for contour in contours:
            if contour.shape in contours_shape:
                # Draw the contour on the blank image
                for point in contour:
                    x_coord, y_coord = int(point[1]), int(point[0])
                    extracted_signal[y_coord, x_coord] = 1  # Set pixel to white
        

        # Create the output filename with the desired convention
        output_filename = f"{base_filename}_lead_{x+1}.png"  
        output_path = os.path.join(class_output_folder, output_filename)  # Save directly to output_folder
        # Save the extracted signal image
        imsave(output_path, extracted_signal)


In [23]:
'''
input_folder = '.'  # Input folder
output_folder = './processed'  # Desired output folder

# Iterate over each class folder and extract and preprocess the leads
# Uncomment to run

for class_label in ['AB', 'HMI', 'MI', 'Normal']:
    class_path = os.path.join(input_folder, class_label)
    for filename in os.listdir(class_path):
        if filename.endswith(('.png', '.jpg', '.jpeg')):
            extract_and_preprocess_leads(filename, class_path, output_folder)

'''

"\ninput_folder = '.'  # Input folder\noutput_folder = './processed'  # Desired output folder\n\n# Iterate over each class folder and extract and preprocess the leads\n# Uncomment to run\n\nfor class_label in ['AB', 'HMI', 'MI', 'Normal']:\n    class_path = os.path.join(input_folder, class_label)\n    for filename in os.listdir(class_path):\n        if filename.endswith(('.png', '.jpg', '.jpeg')):\n            extract_and_preprocess_leads(filename, class_path, output_folder)\n\n"

In [25]:
def create_datasets(folder):
    image_data = []
    labels = []
    for class_label in ['HB', 'MI', 'PMI', 'Normal']:
        class_folder = os.path.join(folder, class_label)
        for filename in os.listdir(class_folder):
            if filename.endswith('.png'):
                # Load image and convert to grayscale
                img = imread(os.path.join(class_folder, filename), as_gray=True)  
                image_data.append(img)
                labels.append(class_label)
    # Encode labels
    label_encoder = LabelEncoder()
    encoded_labels = label_encoder.fit_transform(labels)

    # Print label encoding
    for i, label in enumerate(label_encoder.classes_):
        print(f"{i}: {label}")

    return np.array(image_data), encoded_labels

imagefolder= '../../processed_datasets/ecg_images'
# Create and save the datasets
image_data, labels = create_datasets(imagefolder)

0: HB
1: MI
2: Normal
3: PMI


In [26]:
np.save('image_data.npy', image_data)
np.save('labels.npy', labels) 

In [27]:
image_data.shape

(11136, 180, 230)

In [28]:
labels.shape

(11136,)

In [29]:
unique_labels, counts = np.unique(labels, return_counts=True)
print("Unique labels:", unique_labels)
print("Counts:", counts)

Unique labels: [0 1 2 3]
Counts: [2796 2868 3408 2064]


In [30]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    image_data, labels, test_size=0.2, random_state=42
)

X_train = X_train/255.0

# Save the training and testing datasets
np.save('X_train.npy', X_train)
np.save('X_test.npy', X_test)
np.save('y_train.npy', y_train)
np.save('y_test.npy', y_test)

In [31]:
print("Min pixel value:", image_data.min())
print("Max pixel value:", image_data.max())


Min pixel value: 0
Max pixel value: 255


In [33]:
X_train.dtype

dtype('uint8')

In [4]:
'''
def extract_and_stitch_ecg_leads(image_file, parent_folder, output_folder):
    # Read the image
    image = io.imread(os.path.join(parent_folder, image_file))

    # Dividing the ECG leads with offset
    start_offset = 30 

    Lead_1 = image[300:600, 150 + start_offset:643]
    Lead_2 = image[300:600, 646 + start_offset:1135]
    Lead_3 = image[300:600, 1140 + start_offset:1626]
    Lead_4 = image[300:600, 1630 + start_offset:2125]
    Lead_5 = image[600:900, 150 + start_offset:643]
    Lead_6 = image[600:900, 646 + start_offset:1135]
    Lead_7 = image[600:900, 1140 + start_offset:1626]
    Lead_8 = image[600:900, 1630 + start_offset:2125]
    Lead_9 = image[900:1200, 150 + start_offset:643]
    Lead_10 = image[900:1200, 646 + start_offset:1135]
    Lead_11 = image[900:1200, 1140 + start_offset:1626]
    Lead_12 = image[900:1200, 1630 + start_offset:2125]

    Leads=[Lead_1,Lead_2,Lead_3,Lead_4,Lead_5,Lead_6,Lead_7,Lead_8,Lead_9,Lead_10,Lead_11,Lead_12]

    processed_leads = []
    for lead_img in Leads:
        # Convert to grayscale
        grayscale = color.rgb2gray(lead_img)
        # Smooth the image
        blurred_image = filters.gaussian(grayscale, sigma=0.7)
        # Thresholding
        global_thresh = filters.threshold_otsu(blurred_image)
        binary_global = blurred_image < global_thresh 
        # Morphological Operations (Connect broken segments)
        binary_global = morphology.closing(binary_global, morphology.square(3)) 
        # Resize
        binary_global = resize(binary_global, (180,230))

        # Find contours to isolate the ECG signal (optional, if needed)
        contours = measure.find_contours(binary_global, 0.8)
        contours_shape = sorted([x.shape for x in contours])[::-1][0:1]
        extracted_signal = np.zeros_like(binary_global)
        for contour in contours:
            if contour.shape in contours_shape:
                for point in contour:
                    x_coord, y_coord = int(point[1]), int(point[0])
                    extracted_signal[y_coord, x_coord] = 1 

        processed_leads.append(extracted_signal)

    # Stitch the leads together
    top_row = np.concatenate(processed_leads[:4], axis=1)  # Leads 1 to 4
    middle_row = np.concatenate(processed_leads[4:8], axis=1)  # Leads 5 to 8
    bottom_row = np.concatenate(processed_leads[8:], axis=1)  # Leads 9 to 12
    stitched_image = np.concatenate([top_row, middle_row, bottom_row], axis=0)

    # Extract the original filename without extension
    base_filename = os.path.splitext(image_file)[0]

    # Create the output filename 
    output_filename = f"{base_filename}_stitched.png" 
    output_path = os.path.join(output_folder, output_filename)  
    # Save the stitched image
    io.imsave(output_path, stitched_image)

'''


'\ndef extract_and_stitch_ecg_leads(image_file, parent_folder, output_folder):\n    # Read the image\n    image = io.imread(os.path.join(parent_folder, image_file))\n\n    # Dividing the ECG leads with offset\n    start_offset = 30 \n\n    Lead_1 = image[300:600, 150 + start_offset:643]\n    Lead_2 = image[300:600, 646 + start_offset:1135]\n    Lead_3 = image[300:600, 1140 + start_offset:1626]\n    Lead_4 = image[300:600, 1630 + start_offset:2125]\n    Lead_5 = image[600:900, 150 + start_offset:643]\n    Lead_6 = image[600:900, 646 + start_offset:1135]\n    Lead_7 = image[600:900, 1140 + start_offset:1626]\n    Lead_8 = image[600:900, 1630 + start_offset:2125]\n    Lead_9 = image[900:1200, 150 + start_offset:643]\n    Lead_10 = image[900:1200, 646 + start_offset:1135]\n    Lead_11 = image[900:1200, 1140 + start_offset:1626]\n    Lead_12 = image[900:1200, 1630 + start_offset:2125]\n\n    Leads=[Lead_1,Lead_2,Lead_3,Lead_4,Lead_5,Lead_6,Lead_7,Lead_8,Lead_9,Lead_10,Lead_11,Lead_12]\n\n  

In [5]:
'''
for class_label in ['AB', 'HMI', 'MI', 'Normal']:
    class_path = os.path.join(input_folder, class_label)
    for filename in os.listdir(class_path):
        if filename.endswith(('.png', '.jpg', '.jpeg')):
            extract_and_stitch_ecg_leads(filename, class_path, output_folder)

'''

"\nfor class_label in ['AB', 'HMI', 'MI', 'Normal']:\n    class_path = os.path.join(input_folder, class_label)\n    for filename in os.listdir(class_path):\n        if filename.endswith(('.png', '.jpg', '.jpeg')):\n            extract_and_stitch_ecg_leads(filename, class_path, output_folder)\n\n"