In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import tensorflow as tf
from tensorflow import keras
import librosa
import math
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Conv2D, MaxPooling2D, MaxPooling1D, Dense, Flatten, Dropout, SeparableConv1D
from tensorflow.keras.metrics import Precision, Recall, AUC
import matplotlib.pyplot as plt

from scipy.signal import savgol_filter
from scipy.ndimage import gaussian_filter1d
from scipy.signal import find_peaks
from scipy.signal import hilbert, chirp
from scipy.signal import butter, lfilter

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score, f1_score
from sklearn.metrics import plot_confusion_matrix

import librosa.display as lrd

In [None]:
mel_width = 128
n_mels = 128
def_sr = 44100

learning_rate = 1e-4
epochs = 50

In [None]:
def butter_bandpass(lowcut, highcut, fs, order=5): # filter out noise outside [lowcout, highcut]
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

def rm_lowPSD_noise(signal_data, sr = 4000, PSD_cut = 0.1): # remove noise with energy density < 10%
    
    y = signal_data
    n = len(signal_data)
    t = np.arange(n)/sr
    
    yhat = np.fft.fft(y,n)
    PSD = yhat*np.conj(yhat) / n
    dt = 1/sr
    
    freq = (1/(dt*n))*np.arange(n)
    
    L = np.arange(1, np.floor(n/2), dtype = 'int')
    
    indices = PSD > np.min([np.abs(np.percentile(PSD, 99))*0.1, PSD_cut])
    PSDclean = PSD*indices
    yhat = indices * yhat
    signal_denoised = np.fft.ifft(yhat)
    
    return np.abs(signal_denoised)

def preprocessing_signal(raw_audio_wave, sr, order = 5, lowcut = 500, highcut = 1500, sigma = 3):
    '''
    - filter out the low-frequence noise 
    - remove the outliers according to n-sigma rule
    - normalization, scaled signal to [-1, 1]  
    **(raw)audio_waves --> high_pass_audio --> no_outlier_audio --> normalized_audio**
    '''
    # filter out low- and high-frequency noises
    band_pass_signal = butter_bandpass_filter(raw_audio_wave, lowcut, highcut, sr, order=order)
    
    # remove outliers
    y = band_pass_signal
    y_filtered_outlier_removed = y.copy()
    y_filtered_outlier_removed[np.abs(y) > sigma*np.std(y)] = 0
    
    # normalization
    y_max = np.max([np.abs(y_filtered_outlier_removed.min()), y_filtered_outlier_removed.max()])
    norm_y_filtered_outlier_removed = y_filtered_outlier_removed/y_max
    
    # filter out low spectral density noise
    y_prep = rm_lowPSD_noise(norm_y_filtered_outlier_removed, sr = sr, PSD_cut = 0.05)
    
    return y_prep

In [None]:
def audio_to_melspec(audio, sr, width=mel_width):
    mel = librosa.feature.melspectrogram(audio, sr=sr)
    return mel

In [None]:
def slice_resp_cycle(audio_file_path, annotation_df: pd.DataFrame):
    y, sr = librosa.load(audio_file_path)
    resp_original_mel = []
    resp_fix_len_mel = []
    crackle_label = []
    for _, row in annotation_df.iterrows():
        start = math.floor(row['begin'] * sr)
        end = min(math.ceil(row['end']*sr) + 1, len(y))
        cycle_audio = y[start:end]
        preprocessed_audio = preprocessing_signal(cycle_audio, sr=sr)
        if 4*def_sr >= len(preprocessed_audio):
            clip = librosa.util.pad_center(preprocessed_audio, int(4*def_sr))
            fix_len_mel = audio_to_melspec(clip, sr)
            orig_mel = audio_to_melspec(preprocessed_audio, sr)

            fix_len_img = librosa.amplitude_to_db(fix_len_mel, ref=np.max)
            orig_img = librosa.amplitude_to_db(orig_mel, ref=np.max)
            resp_fix_len_mel.append(fix_len_img)
            resp_original_mel.append(orig_img)
            crackle_label.append(row['is_crackle'])
    return resp_original_mel, resp_fix_len_mel, crackle_label

In [None]:
def read_annotation_df(fname):
    with open(fname) as f_annot:
        lines = [[float(x) if '.' in x else int(x) for x in line.strip().split()] for line in f_annot.readlines() ]
    df = pd.DataFrame(lines, columns=['begin', 'end', 'is_crackle', 'is_wheeze'])
    return df

In [None]:
audio_path = '../input/respiratory-sound-database/Respiratory_Sound_Database/Respiratory_Sound_Database/audio_and_txt_files/'
files = [f for f in os.listdir(audio_path) if os.path.isfile(os.path.join(audio_path, f))] 
audio_ids = [f.split('.')[0] for f in files if f.endswith('.wav')]

In [None]:
audio_id = audio_ids[210]

wav_file = os.path.join(audio_path, audio_id + '.wav')
anno_file = os.path.join(audio_path, audio_id + '.txt')
anno_df = read_annotation_df(anno_file)
original_mels, mels, labels = slice_resp_cycle(wav_file, anno_df)

plt.figure(figsize = (10,10))
plt.subplot(2, 1, 1)
lrd.specshow(mels[0])
plt.subplot(2, 1, 2)
lrd.specshow(original_mels[0])
plt.show()

In [None]:
orig_mel_list = []
mel_list = []
label_list = []

for audio_id in audio_ids:
    wav_file = os.path.join(audio_path, audio_id + '.wav')
    anno_file = os.path.join(audio_path, audio_id + '.txt')
    anno_df = read_annotation_df(anno_file)
    orig_mels, mels, labels = slice_resp_cycle(wav_file, anno_df)
    mel_list += mels
    orig_mel_list += orig_mels
    label_list += labels

In [None]:
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(np.array(mel_list), np.array(label_list), test_size=0.2, random_state=10)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.25, random_state=10)

In [None]:
from keras import backend as K

def f1_loss(y_true, y_pred): 
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0) 
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0) 
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0) 
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0) 
    p = tp / (tp + fp + K.epsilon()) 
    r = tp / (tp + fn + K.epsilon()) 
    f1 = 2*p*r / (p+r+K.epsilon()) 
    f1 = tf.where(tf.math.is_nan(f1), tf.zeros_like(f1), f1) 
    return 1 - K.mean(f1)

In [None]:
mSpec_input=keras.layers.Input(shape=(128,345,1),name="mSpecInput")
x=keras.layers.Conv2D(32,5,strides=(2,3),padding='same')(mSpec_input)
x=keras.layers.BatchNormalization()(x)
x=keras.layers.Activation(keras.activations.relu)(x)
x=keras.layers.MaxPooling2D(pool_size=2,padding='valid')(x)

x=keras.layers.Conv2D(64,3,strides=(2,2),padding='same')(x)
x=keras.layers.BatchNormalization()(x)
x=keras.layers.Activation(keras.activations.relu)(x)
x=keras.layers.MaxPooling2D(pool_size=2,padding='valid')(x)

x=keras.layers.Conv2D(96,2,padding='same')(x)
x=keras.layers.BatchNormalization()(x)
x=keras.layers.Activation(keras.activations.relu)(x)
x=keras.layers.Dropout(0.4)(x)
x=keras.layers.MaxPooling2D(pool_size=2,padding='valid')(x)

x=keras.layers.Conv2D(128,2,padding='same')(x)
x=keras.layers.BatchNormalization()(x)
x=keras.layers.Activation(keras.activations.relu)(x)
x=keras.layers.Dropout(0.4)(x)
x=keras.layers.GlobalMaxPooling2D()(x)
mSpec_output=keras.layers.Dense(1, activation='sigmoid')(x)

mSpec_model=keras.Model(mSpec_input, mSpec_output, name="mSpecModel")

mSpec_model.summary()

opt = keras.optimizers.Adam(learning_rate=learning_rate/5)

mSpec_model.compile(optimizer=opt, 
#                     loss=f1_loss, 
                    loss='binary_crossentropy',
                    metrics=[Recall(), Precision(), 'accuracy'])

In [None]:
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
history = mSpec_model.fit(X_train, 
                          y_train, 
                          validation_data = (X_validation, y_validation),
                          epochs = 100,
                          callbacks=[early_stopping])

In [None]:
def visualize_training(history, lw = 3):
    plt.figure(figsize=(10,6))
    plt.plot(history.history['accuracy'], label = 'training', marker = '*', linewidth = lw)
    plt.plot(history.history['val_accuracy'], label = 'validation', marker = 'o', linewidth = lw)
    plt.title('Training Accuary vs Validation Accuary')
    plt.xlabel('Epochs')
    plt.ylabel('Accuary')
    plt.legend(fontsize = 'x-large')
    plt.show()

    plt.figure(figsize=(10,6))
    plt.plot(history.history['loss'], label = 'training', marker = '*', linewidth = lw)
    plt.plot(history.history['val_loss'], label = 'validation', marker = 'o', linewidth = lw)
    plt.title('Training Loss vs Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend(fontsize = 'x-large')
    plt.show()
visualize_training(history)

In [None]:
mean_mel = [np.mean(x, axis=1) for x in orig_mel_list]
X_train, X_test, y_train, y_test = train_test_split(np.array(mean_mel), np.array(crackle_lables), test_size=0.2, random_state=10)
X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=0.25, random_state=10)

In [None]:
y_train = np.reshape(y_train, (y_train.shape[0], 1))
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
y_validation = np.reshape(y_validation, (y_validation.shape[0], 1))
X_validation = np.reshape(X_validation, (X_validation.shape[0], X_validation.shape[1], 1))
y_test = np.reshape(y_test, (y_test.shape[0], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_train.shape[1],  1))

In [None]:
model = Sequential()
model.add(Conv1D(64, kernel_size=5, activation='relu', input_shape=(128, 1)))

model.add(Conv1D(128, kernel_size=5, activation='relu'))
model.add(MaxPooling1D(3)) 

model.add(SeparableConv1D(256, kernel_size=5, activation='relu'))
model.add(MaxPooling1D(5)) 

model.add(Dropout(0.5))
model.add(Flatten())

model.add(Dense(128, activation='relu'))   
model.add(Dropout(0.))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.Adam(learning_rate=0.001), metrics=[Recall(), Precision(), 'accuracy'])
model.summary()

In [None]:
early_stopping = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=10)
history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), epochs=100, verbose=1, callbacks=[early_stopping])

In [None]:
visualize_training(history)

In [None]:
y_prediction = mSpec_model.predict(X_test)
x = Recall()
y = Precision()
x.update_state(y_test, y_prediction)
y.update_state(y_test, y_prediction)
print(x.result().numpy())
print(y.result().numpy())

In [None]:
import IPython.display as ipd
ipd.Audio('../input/respiratory-sound-database/Respiratory_Sound_Database/Respiratory_Sound_Database/audio_and_txt_files/101_1b1_Al_sc_Meditron.wav')