In [None]:
from numpy.random import seed
seed(9)
# import tensorflow
# tensorflow.random.set_seed(9)
import pandas as pd       
import numpy as np
import matplotlib.pyplot as plt    
import seaborn as sns

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Let us import additional packages and all the audio files from set_b folder. The files form a collection of heartbeat sounds. Hearts normally have a predictable sound pattern as they beat, but some disorders can cause the heart to beat abnormally. 

In [None]:
import librosa as lr
from glob import glob

# List all the wav files in the folder
audio_files = glob('/kaggle/input/heartbeat-sounds/set_b/' + '/*.wav')
len(audio_files)

Let us inspect teh first audio file.

In [None]:
# First audio file
print(audio_files[0])
lr.load(audio_files[0])

Let us load the file, create tiem points and visualize.

In [None]:
# Read in the first audio file, create the time array
audio, sfreq = lr.load(audio_files[0])
print(audio,'\n')
print(len(audio),'\n')
print(sfreq,'\n')

indexes = np.arange(audio.shape[-1])
time_points = indexes/ sfreq
print(time_points,'\n')
print(len(time_points))

In [None]:
# Plot audio over time
fig, ax = plt.subplots(figsize = (15,5))
ax.plot(time_points, audio)
ax.set(xlabel='Time (s)', ylabel='Sound Amplitude')
plt.show()

Some audios are normal heartbeat activity, while others are abnormal activity. Let's see if we can spot the difference.

In [None]:
normal_files = glob('/kaggle/input/heartbeat-sounds/set_b/' + '/normal*.wav')
audio0_n, sfreq0_n = lr.load(normal_files[0])
audio1_n, sfreq1_n = lr.load(normal_files[1])
audio2_n, sfreq2_n = lr.load(normal_files[2])
time_n = np.arange(0, len(audio1_n)) / sfreq1_n
print(len(audio0_n),sfreq0_n,'\n', len(audio1_n),sfreq1_n,'\n',len(audio2_n),sfreq2_n)
print(time_n.shape)
print(time_n)

In [None]:
normal= pd.DataFrame(data=[audio0_n, audio1_n, audio2_n, time_n]).T
normal.columns = ['0','1', '2', 'time']
normal = normal.set_index('time')
# let us consider 100000 time points only
normal = normal.iloc[:100000,]
display(normal.shape)
normal.head(3)

In [None]:
abnormal_files = glob('/kaggle/input/heartbeat-sounds/set_b/' + '/murmur*.wav')
audio0_an, sfreq0_an = lr.load(abnormal_files[0])
audio1_an, sfreq1_an = lr.load(abnormal_files[1])
audio2_an, sfreq2_an = lr.load(abnormal_files[2])
time_an = np.arange(0, len(audio2_an)) / sfreq2_an
print(len(audio0_an),sfreq0_an,'\n', len(audio1_an),sfreq1_an,'\n',len(audio2_an),sfreq2_an)
print(time_an.shape)
print(time_an)

In [None]:
abnormal= pd.DataFrame(data=[audio0_an, audio1_an, audio2_an, time_an]).T
abnormal.columns = ['0','1', '2', 'time']
abnormal = abnormal.set_index('time')
# let us consider 100000 time points only
abnormal = abnormal.iloc[:100000,]
display(abnormal.shape)
abnormal.head(3)

In [None]:
def show_plot_and_make_titles():
    axs[0, 0].set(title="Normal Heartbeats")
    axs[0, 1].set(title="Abnormal Heartbeats")
    ax.set(xlabel='Time (s)', ylabel='Sound Amplitude')
    plt.tight_layout()

In [None]:
fig, axs = plt.subplots(3, 2, figsize=(15, 7), sharex=True, sharey=True)

# Calculate the time array
time = np.arange(0,100000) / sfreq2_an
display(time.shape)

# Stack the normal/abnormal audio so you can loop and plot
stacked_audio = np.hstack([normal, abnormal]).T
display(stacked_audio)
display(stacked_audio.shape )

# Loop through each audio file / ax object and plot
# .T.ravel() transposes the array, then unravels it into a 1-D vector for looping
for iaudio, ax in zip(stacked_audio, axs.T.ravel()):
    ax.plot(time, iaudio)
    show_plot_and_make_titles()

Visualizing our raw data is somewhat often uninformative when it comes to discriminating between two classes of data points. Data is usually noisy or exhibits complex patterns that aren't discoverable by the naked eye.

Acommon technique to find simple differences between two sets of data is to average across multiple instances of the same class. This may remove noise and reveal underlying patterns.

In [None]:
# Average across the audio files of each DataFrame
mean_normal = np.mean(normal, axis=1)
mean_abnormal = np.mean(abnormal, axis=1)

# Plot each average over time
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 3), sharey=True)
ax1.plot(time, mean_normal)
ax1.set(title="Normal Data")
ax2.plot(time, mean_abnormal)
ax2.set(title="Abnormal Data")
ax1.set(xlabel='Time (s)', ylabel='Sound Amplitude')
ax2.set(xlabel='Time (s)')
plt.show()

We will use each repetition as a datapoint, and each moment in time as a feature to fit a classifier that attempts to predict abnormal vs. normal heartbeats using only the raw data. First we will start with normal files to create our required dataset.

In [None]:
audio_n = []
sfreq_n = []
for i in range(len(normal_files)):
    aud, sfr = lr.load(normal_files[i])
    audio_n.append(aud)
    sfreq_n.append(sfr)
time_n = np.arange(0, len(audio_n[0])) / sfreq_n[0]
print(time_n.shape)
print(len(audio_n))
print(len(sfreq_n))

In [None]:
audio_n.append(time_n)
cols=np.arange(len(normal_files)).tolist()
cols.append('time')
normal= pd.DataFrame(data=audio_n).T
normal.columns = cols
normal.head(3)

In [None]:
print(normal.shape)
max([len(a) for a in audio_n])

In [None]:
normal = normal.set_index('time')
# let us consider 50000 time points only
normal = normal.iloc[:50000,]
display(normal.shape)
normal.head(3)

In [None]:
nml = normal.T
nml['type'] = 'Normal'
display(nml.shape)
display(nml.head())

Now, we will repeat the above steps for abnormal files too.

In [None]:
audio_an = []
sfreq_an = []
for i in range(len(abnormal_files)):
    aud, sfr = lr.load(abnormal_files[i])
    audio_an.append(aud)
    sfreq_an.append(sfr)
time_an = np.arange(0, len(audio_an[0])) / sfreq_an[0]
print(time_an.shape)
print(len(audio_an))
print(len(sfreq_an))

In [None]:
audio_an.append(time_an)
cols=np.arange(len(abnormal_files)).tolist()
cols.append('time')
abnormal= pd.DataFrame(data=audio_an).T
abnormal.columns = cols
abnormal.head(3)

In [None]:
abnormal = abnormal.set_index('time')
# let us consider 50000 time points only
abnormal = abnormal.iloc[:50000,]
display(abnormal.shape)
abnormal.head(3)

In [None]:
ab_nml = abnormal.T
ab_nml['type'] = 'Abnormal'
display(ab_nml.shape)
display(ab_nml.head())

Let us append both datasets to form our required dataset.

In [None]:
appended_audio = nml.append(ab_nml)
display(appended_audio.shape)
display(appended_audio)

In [None]:
appended_audio.isna().sum()

In [None]:
appended_audio = appended_audio.fillna(appended_audio.median(axis=0))

In [None]:
appended_audio.isna().sum()

In [None]:
X=appended_audio.drop('type', axis=1)
display(X)
display(X.shape)
y= appended_audio['type']
display(y.shape)
display(y)

We will try to simple classifiers - first SVC and then Neural networks.

In [None]:
from sklearn.model_selection import train_test_split as tts
X_train, X_test, y_train, y_test = tts(X,y,test_size = 0.2, stratify = y, random_state=9)
y_test

In [None]:
from sklearn.svm import LinearSVC

# Initialize and fit the model
model = LinearSVC()
model.fit(X_train, y_train)

# Generate predictions and score them manually
predictions = model.predict(X_test)
print(sum(predictions == y_test) / len(y_test))

In [None]:
from tensorflow.keras.utils import to_categorical
y_encoded = to_categorical(pd.factorize(y)[0])
y_encoded[:3]

In [None]:
X_train, X_test, y_train, y_test = tts(X,y_encoded,test_size = 0.2, stratify = y, random_state=9)

In [None]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(32, activation='relu', input_shape=(50000,)))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(2, activation='sigmoid'))

model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

history=model.fit(X_train, y_train, epochs = 20, validation_split=.2, batch_size=128, verbose=0)

history_dict = history.history
print(history_dict.keys())

In [None]:
print(np.mean(history_dict['val_loss']))
print(np.mean(history_dict['val_accuracy']))
model.evaluate(X_test,y_test)

One of the ways we can improve the features available to our model is to remove some of the noise present in the data. In audio data, a common way to do this is to rectify it and then smooth the data to produce the envelope of the data so that the total amount of sound energy over time is more distinguishable. Let us see how we can do this just to the first audio file in our normal dataset.

In [None]:
audio0 =normal.dropna().iloc[:22050,0]
display(audio0.shape)
audio0.head()

In [None]:
# Plot the raw data first
audio0.plot(figsize=(10, 5))
plt.show()

In [None]:
# Rectify the audio signal
audio_rectified = audio0.apply(np.abs)

# Plot the result
audio_rectified.plot(figsize=(10, 5))
plt.show()

In [None]:
# Smooth by applying a rolling mean
audio_rectified_smooth = audio_rectified.rolling(200).mean()

# Plot the result
audio_rectified_smooth.plot(figsize=(10, 5))
plt.show()

Let us smooth out all our audio files in X.

In [None]:
X_env = X.T
X_env = X_env.apply(np.abs)
X_env = X_env.rolling(200).mean()
display(X_env.tail())
X_env.shape

Sanity Check the first audio file by revisualizing it.

In [None]:
X_env.dropna().iloc[:17000,0].plot(figsize=(10, 5))
plt.show()

In [None]:
# Calculate stats
means = np.mean(X_env, axis=0)
stds = np.std(X_env, axis=0)
maxs = np.max(X_env, axis=0)

# Create the X and y arrays
X1 = np.column_stack([means, stds, maxs])
print(X1.shape)

In [None]:
model = LinearSVC()

# Normalize the data
from sklearn.preprocessing import normalize
X1 = normalize(X1)

# Fit the model and score on testing data
from sklearn.model_selection import cross_val_score
score = cross_val_score(model, X1, y, cv=5)
print(np.mean(score))

Now, we compute tempo and rhythm features for heartbeat data, and fit a model once more.

In [None]:
# Calculate the tempo of the sounds
tempos = []
for col, i_audio in X_env.dropna().items():
    tempos.append(lr.beat.tempo(i_audio.values, sr=sfreq, hop_length=2**6, aggregate=None))

# Convert the list to an array
tempos = np.array(tempos)
print(tempos)

# Calculate statistics of each tempo
tempos_mean = tempos.mean(axis=-1)
tempos_std = tempos.std(axis=-1)
tempos_max = tempos.max(axis=-1)

In [None]:
model = LinearSVC()

# Create the X and y arrays
X2 = np.column_stack([means, stds, maxs, tempos_mean, tempos_std, tempos_max])

# Normalize the data
from sklearn.preprocessing import normalize
X2 = normalize(X2)

# Fit the model and score on testing data
score = cross_val_score(model, X2, y, cv=5)
print(np.mean(score))

We will calculate a spectrogram of the for sound for  asample audio file. This describes what spectral content (e.g., low and high pitches) are present in the sound over time.

In [None]:
audio_0 = normal.dropna().iloc[:,0]

# Import the stft function
from librosa.core import stft

# Prepare the STFT
HOP_LENGTH = 2**4
spec = stft(np.array(audio_0), hop_length=HOP_LENGTH, n_fft=2**7)
display(spec.shape)
spec[:2]

In [None]:
from librosa.core import amplitude_to_db
from librosa.display import specshow

time = np.arange(0, len(audio_0)) / sfreq

# Convert into decibels
spec_db = amplitude_to_db(spec)
display(spec_db.shape)
display(spec_db)

# Compare the raw audio to the spectrogram of the audio
fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True)
axs[0].plot(time, audio_0)
specshow(spec_db, sr=sfreq, x_axis='time', y_axis='hz', hop_length=HOP_LENGTH)
plt.show()