<h1>Heartbeat Analysis With Machine Learning</h1>
Author: Syed Mahdi

In [None]:
import os
import pandas as pd
import tensorflow as tf
import keras
import matplotlib.pyplot as plt
import numpy as np

<h2>Exploring Our Dataset</h2>

In [None]:
path = "./heartbeat_dataset"  # Path to our dataset

In [None]:
set_a = pd.read_csv(path + "/set_a.csv")
print(f"Unique labels: {set_a['label'].unique()}")
set_a

In [None]:
set_a_timing = pd.read_csv(path + "/set_a_timing.csv")
set_a_timing

In [None]:
set_b = pd.read_csv(path + "/set_b.csv")
print(f"Unique labels: {set_b['label'].unique()}")
set_b

In [None]:
combined_dataset = pd.concat([set_a, set_b])
print(f"Unique labels: {combined_dataset['label'].unique()}")
combined_dataset

In [None]:
# Determine what the shortest, longest, and average durations of our wav files are
def analyze_wav_durations(directory):
    shortest_duration = float('inf')
    longest_duration = 0
    total_duration = 0
    file_count = 0

    # Iterate over each file in the directory
    for filename in os.listdir(directory):
        if filename.endswith('.wav'):
            file_path = os.path.join(directory, filename)
            with wave.open(file_path, 'r') as wav_file:
                frames = wav_file.getnframes()
                rate = wav_file.getframerate()
                duration = frames / float(rate)

                # Update the shortest, longest, and total durations
                if duration < shortest_duration:
                    shortest_duration = duration
                if duration > longest_duration:
                    longest_duration = duration
                total_duration += duration
                file_count += 1

    # Calculate the average duration
    average_duration = total_duration / file_count if file_count > 0 else 0

    return shortest_duration, longest_duration, average_duration

set_a_shortest, set_a_longest, set_a_average = analyze_wav_durations(path + '/set_a')
set_b_shortest, set_b_longest, set_b_average = analyze_wav_durations(path + '/set_b')

print(f"Shortest duration: {set_a_shortest if set_a_shortest < set_b_shortest else set_b_shortest}")
print(f"Longest duration: {set_a_longest if set_a_longest > set_b_longest else set_b_longest}")
print(f"Average duration: {(set_a_average + set_b_average) / 2}")

<h2>Waveforms of Different Heartbeat Categories</h2>

In [None]:
# Function to plot the waveform of a given file
from scipy.io.wavfile import read

def plot_waveform_for_file(file_path):
    # Source: https://www.tutorialspoint.com/how-to-plot-a-wav-file-using-matplotlib
    input_data = read(file_path)
    audio = input_data[1]
    plt.plot(audio)
    plt.ylabel("Amplitude")
    plt.xlabel("Time")
    plt.show()

In [None]:
# We can see the waveform for a normal heartbeat has distinct patterns for the "lub-dub" the heart makes
normal_heartbeat_path = path + "/set_a/normal__201101070538.wav"
plot_waveform_for_file(normal_heartbeat_path)

In [None]:
# We can see heartbeat murmurs begin to lose this distinct "lub-dub" pattern
murmur_heartbeat_path = path + "/set_a/murmur__201106141148.wav"
plot_waveform_for_file(murmur_heartbeat_path)

In [None]:
# Extrasystole heartbeats are out of rythm, and can result in "lub-dub-dub" or "lub-lub-dub" heartbeats
extrasystole_heartbeat_path = path + "/set_b/extrastole__144_1306522408528_B.wav"
plot_waveform_for_file(extrastole_heartbeat_path)

In [None]:
# Artifacts are when there aren't heartbeat sounds and needs to be
# identified so we can know when to attempt classification again
artifact_heartbeat_path = path + "/set_a/artifact__201106101314.wav"
plot_waveform_for_file(artifact_heartbeat_path)

In [None]:
# The extrahls label indicates there are additional sounds with the heartbeat
extra_heart_sound_path = path + "/set_a/extrahls__201103150114.wav"
plot_waveform_for_file(extra_heart_sound_path)

<h2>Audio Data Preprocessing</h2>