# data set contains PCG heart signals of 5 types =>{AS, MS, MVP, MR, A}, each reperesenting a type of valvular heart disease

In [4]:
import os
import librosa
import numpy as np
import pywt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization, Dense, Dropout, Input, Flatten, GlobalAveragePooling1D

# Preprocessing and DWT

In [5]:
# Parameters
wavelet = "db4"
sampling_rate = 4000
level = 5
X,y = [],[]

# Paths
data_dir = "/kaggle/input/yasheen-khan/PCG_data/"


# same dimentions
def pad_or_truncate(signal, target_length=12000):  # 1 second at 4kHz => 3sec
    if len(signal) > target_length:
        return signal[:target_length]  # Truncate
    elif len(signal) < target_length:
        return np.pad(signal, (0, target_length - len(signal)), mode='constant')  # Pad with zeros
    return signal



# normalized DWT signals having shape => (6003, 6) {len = 6003, channels = 6}
def process_pcg_signal(signal, wavelet=wavelet, level=level):
    
    coeffs = pywt.wavedec(signal, wavelet, level=level) # get DWT
    
    scaler = MinMaxScaler() 
    normalized_coeffs = [scaler.fit_transform(c.reshape(-1, 1)).flatten() for c in coeffs] #normalise
    
    stacked_coeffs = np.stack([np.resize(c, (6003)) for c in normalized_coeffs],axis = -1) # (6003,6) 
    
    return stacked_coeffs



def generate_train_test():
    for class_idx, class_label in enumerate(os.listdir(data_dir)):
        class_path = os.path.join(data_dir, class_label)
        
        for file in os.listdir(class_path):
            if file.endswith('.wav'):
                
                file_path = os.path.join(class_path, file)
                signal, _ = librosa.load(file_path, sr=sampling_rate)
                
                signal = pad_or_truncate(signal)
                dwt = process_pcg_signal(signal)
                
                X.append(dwt)
                y.append(class_idx)

    
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    # create tf dataset object
    train_data = tf.data.Dataset.from_tensor_slices((X_train, y_train,))
    train_data = train_data.shuffle(buffer_size=len(X_train)).batch(32).prefetch(tf.data.experimental.AUTOTUNE)
    
    test_data = tf.data.Dataset.from_tensor_slices((X_test, y_test,))
    test_data = test_data.shuffle(buffer_size=len(X_test)).batch(32).prefetch(tf.data.experimental.AUTOTUNE)

    return (train_data, test_data)



# 1D CNN

In [6]:
def build_cnn(input_shape=(6003,6), num_classes = 5):
    model = Sequential([
        Input(shape = input_shape),
        
        Conv1D(32, 7, activation="relu", padding="same"),
        BatchNormalization(),
        MaxPooling1D(2,2),

        Conv1D(24, 7, activation="relu", padding="same"),
        BatchNormalization(),
        MaxPooling1D(2,2),
        
        Conv1D(128, 5, activation="relu", padding="same"),
        BatchNormalization(),
        MaxPooling1D(2,2),

        # GlobalAveragePooling1D(),
        Flatten(),
        Dense(64, activation = "relu"),
        Dropout(0.3),
        Dense(32, activation = "relu"),
        Dropout(0.3),
        Dense(num_classes, activation = "softmax"),
    ])

    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

# Run 

In [7]:
batch_size = 32
epochs = 25

if __name__ == "__main__":
    
    # generate dataset
    train_data, test_data = generate_train_test()
    
    # build cnn
    cnn = build_cnn()

    # train cnn
    history = cnn.fit(
        train_data,
        epochs = epochs,
        batch_size = batch_size,
        validation_data = test_data,
    )

    # evaluate model
    cnn.evaluate(test_data)

Epoch 1/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 86ms/step - accuracy: 0.3116 - loss: 9.6378 - val_accuracy: 0.2000 - val_loss: 7.7826
Epoch 2/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - accuracy: 0.4548 - loss: 3.7390 - val_accuracy: 0.2050 - val_loss: 15.2830
Epoch 3/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - accuracy: 0.4298 - loss: 1.7924 - val_accuracy: 0.2850 - val_loss: 16.3084
Epoch 4/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - accuracy: 0.5527 - loss: 1.1916 - val_accuracy: 0.2100 - val_loss: 18.9384
Epoch 5/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - accuracy: 0.5460 - loss: 1.3897 - val_accuracy: 0.2000 - val_loss: 28.0971
Epoch 6/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 34ms/step - accuracy: 0.5917 - loss: 1.0414 - val_accuracy: 0.2000 - val_loss: 34.6429
Epoch 7/25
[1m25/25[0m [32