<a href="https://colab.research.google.com/github/ranagursoy/ECG-signal/blob/main/ECG-kaggledata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Data

In [None]:
import pandas as pd
import os

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/MyDrive/ColabNotebooks/dosya"

In [None]:
!kaggle datasets download -d erhmrai/ecg-image-data

In [None]:
!unzip \*.zip  && rm *.zip

# Data Prep

In [None]:
import os

def count_images_in_directory(directory):
    image_extensions = ('.png', '.jpg', '.jpeg', '.bmp', '.gif')
    image_counts = {}

    for root, dirs, files in os.walk(directory):
        for dir_name in dirs:
            subdir_path = os.path.join(root, dir_name)
            count = len([file for file in os.listdir(subdir_path) if file.lower().endswith(image_extensions)])
            image_counts[dir_name] = count

    return image_counts

def print_image_counts(directory, name):
    image_counts = count_images_in_directory(directory)
    total_images = sum(image_counts.values())

    print(f"\n{name} Directory:")
    for dir_name, count in image_counts.items():
        print(f"{dir_name}: {count} images")

    return total_images

train_directory = '/content/ECG_Image_data/train'
test_directory = '/content/ECG_Image_data/test'

train_image_count = print_image_counts(train_directory, "Train")
test_image_count = print_image_counts(test_directory, "Test")

total_images = train_image_count + test_image_count

train_percentage = (train_image_count / total_images) * 100
test_percentage = (test_image_count / total_images) * 100

print(f"\nOverall Image Distribution:")
print(f"Train: {train_image_count} images ({train_percentage:.2f}%)")
print(f"Test: {test_image_count} images ({test_percentage:.2f}%)")

In [None]:
import os
import random
import shutil

def split_data(source_directory, train_directory, val_directory, test_directory, train_ratio=0.75, val_ratio=0.15, test_ratio=0.15):
    for subdir in os.listdir(source_directory):
        subdir_path = os.path.join(source_directory, subdir)

        if os.path.isdir(subdir_path):
            all_files = [f for f in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, f))]

            random.shuffle(all_files)
            train_split = int(len(all_files) * train_ratio)
            val_split = int(len(all_files) * val_ratio)

            train_files = all_files[:train_split]
            val_files = all_files[train_split:train_split + val_split]
            test_files = all_files[train_split + val_split:]

            train_subdir_path = os.path.join(train_directory, subdir)
            val_subdir_path = os.path.join(val_directory, subdir)
            test_subdir_path = os.path.join(test_directory, subdir)

            os.makedirs(train_subdir_path, exist_ok=True)
            os.makedirs(val_subdir_path, exist_ok=True)
            os.makedirs(test_subdir_path, exist_ok=True)

            for file in train_files:
                shutil.move(os.path.join(subdir_path, file), os.path.join(train_subdir_path, file))

            for file in val_files:
                shutil.move(os.path.join(subdir_path, file), os.path.join(val_subdir_path, file))

            for file in test_files:
                shutil.move(os.path.join(subdir_path, file), os.path.join(test_subdir_path, file))


source_directory = '/content/ECG_Image_data'
train_directory = '/content/ECG_Image_data/train'
val_directory = '/content/ECG_Image_data/val'
test_directory = '/content/ECG_Image_data/test'

split_data(source_directory, train_directory, val_directory, test_directory)

In [None]:
train_image_count = count_images_in_directory("/content/ECG_Image_data/train")
test_image_count = count_images_in_directory("/content/ECG_Image_data/new_test")
val_image_count = count_images_in_directory("/content/ECG_Image_data/val")

print(f"Train: {train_image_count}")
print(f"Test: {test_image_count}")
print(f"Val: {val_image_count}")

In [None]:
import os
import random
import shutil

def balance_and_split_data(source_directory, train_directory, val_directory, test_directory, max_files_per_class):
    # Alt klasörler: train, val, test
    source_train_dir = os.path.join(source_directory, 'train')
    source_val_dir = os.path.join(source_directory, 'val')
    source_test_dir = os.path.join(source_directory, 'test')

    # Her sınıf için toplu dosya listesi
    all_files = {}

    # Her alt klasördeki dosyaları toplama
    for data_type, data_dir in zip(['train', 'val', 'test'], [source_train_dir, source_val_dir, source_test_dir]):
        for subdir in os.listdir(data_dir):
            subdir_path = os.path.join(data_dir, subdir)
            if os.path.isdir(subdir_path) and subdir != '.ipynb_checkpoints':
                files = [os.path.join(subdir_path, f) for f in os.listdir(subdir_path) if os.path.isfile(os.path.join(subdir_path, f))]
                if subdir not in all_files:
                    all_files[subdir] = []
                all_files[subdir].extend(files)

    # Belirtilen maksimum sayıda dosyayı train, val ve test klasörlerine kopyalama
    for subdir, files in all_files.items():
        random.shuffle(files)

        max_train = max_files_per_class.get(subdir, {}).get('train', 0)
        max_val = max_files_per_class.get(subdir, {}).get('val', 0)
        max_test = max_files_per_class.get(subdir, {}).get('test', 0)

        # Sadece belirli sayıda dosya alın
        train_files = files[:max_train]
        val_files = files[max_train:max_train + max_val]
        test_files = files[max_train + max_val:max_train + max_val + max_test]

        # Her klasör için hedef yollar
        train_subdir_path = os.path.join(train_directory, subdir)
        val_subdir_path = os.path.join(val_directory, subdir)
        test_subdir_path = os.path.join(test_directory, subdir)

        os.makedirs(train_subdir_path, exist_ok=True)
        os.makedirs(val_subdir_path, exist_ok=True)
        os.makedirs(test_subdir_path, exist_ok=True)

        # Dosyaları ilgili klasörlere kopyalama
        for file in train_files:
            shutil.copy(file, os.path.join(train_subdir_path, os.path.basename(file)))

        for file in val_files:
            shutil.copy(file, os.path.join(val_subdir_path, os.path.basename(file)))

        for file in test_files:
            shutil.copy(file, os.path.join(test_subdir_path, os.path.basename(file)))

# Ana kaynak dizin ve hedef dizinler
source_directory = '/content/ECG_Image_data'
train_directory = '/content/ECG_Image_data_balanced/train'
val_directory = '/content/ECG_Image_data_balanced/val'
test_directory = '/content/ECG_Image_data_balanced/test'

# Her sınıf için farklı max_files_per_class değerlerini belirleme
max_files_per_class = {
    'F': {'train': 800, 'val': 200, 'test': 150},
    'N': {'train': 800, 'val': 200, 'test': 150},
    'V': {'train': 800, 'val': 200, 'test': 150},
    'S': {'train': 800, 'val': 200, 'test': 150},
    'Q': {'train': 800, 'val': 200, 'test': 150},
    'M': {'train': 800, 'val': 200, 'test': 150}
}


# Veriyi yeniden dengelemek ve kopyalamak
balance_and_split_data(source_directory, train_directory, val_directory, test_directory, max_files_per_class=max_files_per_class)


In [None]:
import os
import shutil

def remove_ipynb_checkpoints(directory):
    for root, dirs, files in os.walk(directory):
        if '.ipynb_checkpoints' in dirs:
            checkpoint_path = os.path.join(root, '.ipynb_checkpoints')
            shutil.rmtree(checkpoint_path)
            print(f"Removed: {checkpoint_path}")

# Ana dizinler
source_directory = '/content/ECG_Image_data'
train_directory = '/content/ECG_Image_data_balanced/train'
val_directory = '/content/ECG_Image_data_balanced/val'
test_directory = '/content/ECG_Image_data_balanced/test'

# Tüm dizinlerde .ipynb_checkpoints klasörlerini sil
remove_ipynb_checkpoints(source_directory)
remove_ipynb_checkpoints(train_directory)
remove_ipynb_checkpoints(val_directory)
remove_ipynb_checkpoints(test_directory)


In [None]:
from PIL import Image
import os

def find_corrupted_images(directory):
    corrupted_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                img = Image.open(file_path)
                img.verify()  # Verify that it is, in fact, an image
            except (IOError, SyntaxError) as e:
                print(f"Corrupted image detected: {file_path}")
                corrupted_files.append(file_path)
    return corrupted_files

train_dir = '/content/ECG_Image_data_balanced/train'
val_dir = '/content/ECG_Image_data_balanced/val'
test_dir = '/content/ECG_Image_data_balanced/test'

# Tüm veri setini kontrol edin
corrupted_files = find_corrupted_images(train_dir) + find_corrupted_images(val_dir) + find_corrupted_images(test_dir)

# Bozuk dosyaları temizle
for file_path in corrupted_files:
    os.remove(file_path)

print(f"{len(corrupted_files)} corrupted files removed.")


In [None]:
train_image_count = count_images_in_directory("/content/ECG_Image_data_balanced/train")
test_image_count = count_images_in_directory("/content/ECG_Image_data_balanced/test")
val_image_count = count_images_in_directory("/content/ECG_Image_data_balanced/val")

print(f"Train: {train_image_count}")
print(f"Test: {test_image_count}")
print(f"Val: {val_image_count}")

In [None]:
train_image_count = print_image_counts("/content/ECG_Image_data_balanced/train", "Train")
test_image_count = print_image_counts("/content/ECG_Image_data_balanced/test", "Test")
val_image_count = print_image_counts("/content/ECG_Image_data_balanced/val", "Val")

total_images = train_image_count + test_image_count + val_image_count

train_percentage = (train_image_count / total_images) * 100
test_percentage = (test_image_count / total_images) * 100
val_percentage = (val_image_count / total_images) * 100

print(f"Train: {train_image_count} images ({train_percentage:.2f}%)")
print(f"Test: {test_image_count} images ({test_percentage:.2f}%)")
print(f"Val: {val_image_count} images ({val_percentage:.2f}%)")

In [None]:
from PIL import Image

# Resmin yolu
image_path = '/content/ECG_Image_data_balanced/train/F/F0.png'

# Resmi aç
img = Image.open(image_path)

# Resmin boyutlarını al (genişlik, yükseklik)
width, height = img.size

print(f"Resmin genişliği: {width} piksel")
print(f"Resmin yüksekliği: {height} piksel")


# Train & Results

In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Model
from keras.layers import Dense, Dropout, GlobalAveragePooling2D
from keras.applications.vgg16 import VGG16
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from keras.optimizers import Adam

# Enable GPU memory growth
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Using GPU devices: {gpus}")
    except RuntimeError as e:
        print(e)

# ImageDataGenerator for data augmentation and preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

test_val_datagen = ImageDataGenerator(rescale=1./255)

# Directories for training, validation, and test sets
train_dir = '/content/ECG_Image_data_balanced/train'
val_dir = '/content/ECG_Image_data_balanced/val'
test_dir = '/content/ECG_Image_data_balanced/test'

# Flow data from directories
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(432, 288),  # Resmin genişliği: 432 piksel, yüksekliği: 288 piksel
    batch_size=32,
    class_mode='categorical'
)

val_generator = test_val_datagen.flow_from_directory(
    val_dir,
    target_size=(432, 288),
    batch_size=32,
    class_mode='categorical'
)

test_generator = test_val_datagen.flow_from_directory(
    test_dir,
    target_size=(432, 288),
    batch_size=32,
    class_mode='categorical',
    shuffle=False  # Important for getting correct labels when predicting
)

# Load VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(432, 288, 3))

# Freeze the first 15 layers of VGG16 to prevent them from being trained
for layer in base_model.layers[:10]:
    layer.trainable = False

# Keep the rest of the layers trainable
for layer in base_model.layers[10:]:
    layer.trainable = True

# Add custom layers on top
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.1)(x)
predictions = Dense(train_generator.num_classes, activation='softmax')(x)

# Final model
model = Model(inputs=base_model.input, outputs=predictions)

optimizer = Adam(learning_rate=0.0001)  # VGG16 için daha düşük bir öğrenme oranı

# Compile the model
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

# Directory for results
results_dir = '/content/drive/MyDrive/ECG-Results/training_results_vgg16_6_classes_1'
os.makedirs(results_dir, exist_ok=True)

checkpoint_path = os.path.join(results_dir, 'best_model_vgg16.keras')
checkpoint = ModelCheckpoint(
    checkpoint_path,
    save_best_only=True,
    monitor='val_loss',
    mode='min'
)

# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, mode='min')

# Learning rate reduction
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.1)

# Train the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=50,
    callbacks=[checkpoint, early_stopping, reduce_lr]
)

# Save the training history
history_df = pd.DataFrame(history.history)
history_df.to_csv(os.path.join(results_dir, 'training_history_vgg16.csv'))

# Evaluate the model on train data
train_generator.reset()
Y_train_pred = model.predict(train_generator)
Y_train_pred_classes = np.argmax(Y_train_pred, axis=1)
y_train_classes = train_generator.classes  # True labels

# Confusion matrix and classification report for train data
confusion_mtx_train = confusion_matrix(y_train_classes, Y_train_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_mtx_train, annot=True, fmt="d", cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix (Train Data)')
plt.savefig(os.path.join(results_dir, 'confusion_matrix_train_vgg16.png'))
plt.close()

report_train = classification_report(y_train_classes, Y_train_pred_classes, target_names=list(train_generator.class_indices.keys()))
with open(os.path.join(results_dir, 'classification_report_train_vgg16.txt'), 'w') as f:
    f.write(report_train)

# Evaluate the model on validation data
val_generator.reset()
Y_val_pred = model.predict(val_generator)
Y_val_pred_classes = np.argmax(Y_val_pred, axis=1)
y_val_classes = val_generator.classes  # True labels

# Confusion matrix and classification report for validation data
confusion_mtx_val = confusion_matrix(y_val_classes, Y_val_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_mtx_val, annot=True, fmt="d", cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix (Validation Data)')
plt.savefig(os.path.join(results_dir, 'confusion_matrix_val_vgg16.png'))
plt.close()

report_val = classification_report(y_val_classes, Y_val_pred_classes, target_names=list(val_generator.class_indices.keys()))
with open(os.path.join(results_dir, 'classification_report_val_vgg16.txt'), 'w') as f:
    f.write(report_val)

# Evaluate the model on test data
test_generator.reset()
Y_pred = model.predict(test_generator)
Y_pred_classes = np.argmax(Y_pred, axis=1)
y_test_classes = test_generator.classes  # True labels

# Handle the case where there are missing classes in predictions
unique_true_labels = np.unique(y_test_classes)
unique_pred_labels = np.unique(Y_pred_classes)

# Tüm sınıflar (6 sınıf olduğunu varsayıyoruz)
all_classes = np.array(range(len(test_generator.class_indices)))

# Eksik sınıfları doldur
missing_true_labels = np.setdiff1d(all_classes, unique_true_labels)
missing_pred_labels = np.setdiff1d(all_classes, unique_pred_labels)

# Gerçek sınıflara eksik sınıfları ekle
for label in missing_true_labels:
    y_test_classes = np.append(y_test_classes, label)
    Y_pred_classes = np.append(Y_pred_classes, label)

# Tahmin edilen sınıflara eksik sınıfları ekle
for label in missing_pred_labels:
    Y_pred_classes = np.append(Y_pred_classes, label)
    y_test_classes = np.append(y_test_classes, label)

# Confusion matrix and classification report for test data
confusion_mtx = confusion_matrix(y_test_classes, Y_pred_classes)
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_mtx, annot=True, fmt="d", cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix (Test Data)')
plt.savefig(os.path.join(results_dir, 'confusion_matrix_vgg16.png'))
plt.close()

report = classification_report(y_test_classes, Y_pred_classes, target_names=list(test_generator.class_indices.keys()))
with open(os.path.join(results_dir, 'classification_report_vgg16.txt'), 'w') as f:
    f.write(report)

print("Model training and evaluation complete. Results saved in 'training_results_vgg16_6_classes' directory.")

FileNotFoundError: [Errno 2] No such file or directory: '/content/ECG_Image_data_balanced/train'