In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
from IPython.display import Audio
from scipy.signal import find_peaks
from scipy.signal import butter, sosfilt, sosfreqz
import math
import os

In [30]:
sample_rate = 48000

In [3]:
# Extracts timestamps from a .txt file
# Each line in the file contains a start and finish time separated by a tab.

def get_real_timestamps(audio_timestamp_file):
    
    timestamps = []

    with open(audio_timestamp_file, "r") as f:
        for line in f:
            line = line.strip()
            if line:
                start, end = map(float, line.split("\t"))
                timestamps.append((start, end))

    return timestamps

In [4]:
import os
import librosa

# Function to load audio data along with corresponding timestamps and labels.
def load_data(audio_dir, timestamp_dir, label, sr=48000):
    
    """
    Loads audio data from WAV files along with corresponding timestamps and labels.

    Parameters:
        audio_dir (str): Directory containing audio WAV files.
        timestamp_dir (str): Directory containing timestamp text files.
        label (str): Label to assign to the loaded data.
        sr (int): Sample rate of the audio files (default is 48000).

    Returns:
        tuple: A tuple containing lists of loaded audio data, timestamps, and labels.
    """
    
    audio_files = [file for file in os.listdir(audio_dir) if file.endswith('.wav')]
    timestamp_files = [file for file in os.listdir(timestamp_dir) if file.endswith('.txt')]
    
    
    audios = []
    timestamps = []
    labels = []
    
    for audio_file in audio_files:
    
        try:
            
            base_name = os.path.splitext(audio_file)[0]
            timestamp_file = base_name + '-label.txt'
            
            if timestamp_file not in timestamp_files:
                raise ValueError(f"No corresponding timestamp file found for {audio_file}")
                
            audio_path = os.path.join(audio_dir, audio_file)
            timestamp_path = os.path.join(timestamp_dir, timestamp_file)
            
            # Adding timestamps to the list
            real_timestamps = get_real_timestamps(timestamp_path)

            # Loading audio file
            data, sample_rate = librosa.load(audio_path, sr=sr, mono=True)
            data = librosa.resample(data, orig_sr=sample_rate, target_sr=sr)

            audios.append(data)
            timestamps.append(real_timestamps)
            labels.append(label)

        except Exception as e:
            print(f"Error processing {audio_file}: {e}")
            continue

    
    return audios, timestamps, labels

In [16]:
# This function designs a Butterworth bandpass filter and applies it to the input data.

def butter_bandpass(lowcut, highcut, fs, order=8):

    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    sos = butter(order, [low, high], analog=False, btype='band', output='sos')
    return sos

def butter_bandpass_filter(data, lowcut, highcut, fs, order=8):

    sos = butter_bandpass(lowcut, highcut, fs, order=order)
    y = sosfilt(sos, data)
    return y

In [17]:
# Function to normalize the given data

def normalize_data(data):

    # Find the minimum and maximum values in the data
    data_min = np.min(data)
    data_max = np.max(data)

    # Normalize the data using min-max scaling
    normalized_data = (data - data_min) / (data_max - data_min)

    return normalized_data

In [18]:
def compute_moving_average(data, window_size=15):

    kernel = np.ones(window_size) / window_size
    moving_averages = np.convolve(data, kernel, mode='valid')
    moving_averages = np.round(moving_averages, 2)
    
    return moving_averages

In [25]:
# Preprocesses the input data by applying a bandpass filter, computing moving average, and normalizing the data.
def preprocess_data(data):
    
    """
    Parameters:
    - data (ndarray): Input data to be preprocessed.
        
    Returns:
    - ndarray: Preprocessed data.

    Note:
    - This function applies a bandpass filter, computes the moving average, and normalizes the data.
    """

    # Apply bandpass filter to remove unwanted frequencies
    filtered_data = butter_bandpass_filter(data, lowcut=1000, highcut=4000, fs=48000, order=8)

    # Compute moving average of the absolute values of the filtered data
    averaged_data = compute_moving_average(np.abs(filtered_data))

    # Normalize the data
    normalized_data = normalize_data(averaged_data)

    # Flatten the data
    flattened_data = normalized_data.flatten()

    return flattened_data

In [20]:
# Cough Detection Algorithm

def detect_coughs(preprocessed_data):

    # Finding the number of coughs with a threshold

    # Calculate maximum magnitude, average magnitude, and standard deviation of the preprocessed data
    max_value = np.max(preprocessed_data)
    mean_value = np.mean(preprocessed_data)
    std = np.std(preprocessed_data)

    print("Maximum Magnitude: ", max_value)
    print("Average Magnitude: ", mean_value)
    print("Standard Deviation: ", std)

    # Define the percentile threshold for peak detection
    percentile_threshold = 99.8
    threshold = np.percentile(preprocessed_data, percentile_threshold)

    # Find peaks in the preprocessed data using specified prominence and height threshold
    cough_indices, _ = find_peaks(preprocessed_data, prominence=0.5, height=threshold)
    cough_indices = list(cough_indices)

    # Deleting overlaps in the peaks - Avoiding counting the same cough more than once
    i = 0
    while i < len(cough_indices):
        peak = cough_indices[i]
        peak_range = (peak - 4800, peak + 4800) 

        # Find overlapping indices within a certain range around the peak
        overlap_indices = [index for index in cough_indices if peak_range[0] < index < peak_range[1]]

        if len(overlap_indices) > 1:

            # Find the index with maximum amplitude among overlapping indices
            max_index = overlap_indices[0]
            for index in overlap_indices:
                if preprocessed_data[index] > preprocessed_data[max_index]:
                    max_index = index

            overlap_indices.remove(max_index)

            for element in overlap_indices:
                cough_indices.remove(element)

        i += 1

    # Convert peak indices to timestamps based on the sampling rate
    predicted_timestamps = [round(index / sample_rate, 6) for index in cough_indices]

    # Remove closely occurring coughs to avoid double counting
    for ts in predicted_timestamps:
        matches = [ts_2 for ts_2 in predicted_timestamps if ts < ts_2 < ts + 0.2]

        if len(matches) != 0:
            for match in matches:
                index = predicted_timestamps.index(match)
                predicted_timestamps.remove(match)
                cough_indices.remove(cough_indices[index])

    return cough_indices, predicted_timestamps

In [41]:
audio_dir = '../recordings-and-timestamps/audio-recordings/cough-recordings/'
timestamp_dir = '../recordings-and-timestamps/audio-timestamps/cough-timestamps/'

audio_files, timestamp_files,_ = load_data(audio_dir, timestamp_dir, 'cough')

all_timestamp_data = dict()

# Getting cough detection results
i = 1

for data, real_timestamps in zip(audio_files, timestamp_files):
    
    preprocessed_data = preprocess_data(data)
        
    # Cough detection
    cough_indices, predicted_timestamps = detect_coughs(preprocessed_data)

    # Adding predicted and real timestamp tuples to the list
    all_timestamp_data[i]=(predicted_timestamps, real_timestamps)

    cough_count = len(cough_indices)

    # The results
    print(f"\nAudio {i}")
    print("---------------------")
    print(f"Cough Count: {cough_count}")
    print("Cough Predicted Timestamps: {}".format([round(timestamp, 6) for timestamp in predicted_timestamps]))
    print("Cough Real Timestamps: {}".format([timestamp for timestamp in real_timestamps]))
    print(f"Cough Indices: {cough_indices}\n")
    
    i += 1

Maximum Magnitude:  1.0
Average Magnitude:  0.025262227481100154
Standard Deviation:  0.06479428897084559

Audio 1
---------------------
Cough Count: 20
Cough Predicted Timestamps: [3.390854, 3.601, 9.750396, 13.068417, 17.478854, 17.782896, 18.054917, 20.795021, 25.175333, 25.440062, 29.835417, 30.20175, 30.992542, 31.752625, 33.947646, 36.170438, 36.977354, 38.005375, 40.573458, 49.782875]
Cough Real Timestamps: [(3.33773, 3.927718), (9.447733, 9.969193), (12.818076, 13.29121), (17.372347, 17.900496), (25.149178, 25.678278), (29.956115, 30.395156), (40.311107, 40.808312), (49.672679, 50.157047)]
Cough Indices: [162761, 172848, 468019, 627284, 838985, 853579, 866636, 998161, 1208416, 1221123, 1432100, 1449684, 1487642, 1524126, 1629487, 1736181, 1774913, 1824258, 1947526, 2389578]

Maximum Magnitude:  1.0
Average Magnitude:  0.002347847952386695
Standard Deviation:  0.019411401719267224

Audio 2
---------------------
Cough Count: 11
Cough Predicted Timestamps: [3.768604, 9.500271, 14.


Audio 14
---------------------
Cough Count: 4
Cough Predicted Timestamps: [2.00275, 4.70025, 14.213083, 27.805604]
Cough Real Timestamps: [(1.858516, 2.548154), (4.628758, 5.271641), (14.135495, 14.641906), (27.724396, 28.237217)]
Cough Indices: [96132, 225612, 682228, 1334669]

Maximum Magnitude:  1.0
Average Magnitude:  0.004096882891866702
Standard Deviation:  0.03439859406192197

Audio 15
---------------------
Cough Count: 4
Cough Predicted Timestamps: [2.753896, 5.824917, 21.499854, 44.115375]
Cough Real Timestamps: [(2.569184, 3.425579), (5.679249, 6.400424), (21.395504, 22.047765), (44.027608, 44.712482)]
Cough Indices: [132187, 279596, 1031993, 2117538]

Maximum Magnitude:  1.0
Average Magnitude:  0.002251900903683631
Standard Deviation:  0.0234624839214114

Audio 16
---------------------
Cough Count: 3
Cough Predicted Timestamps: [1.731208, 11.735583, 22.14175]
Cough Real Timestamps: [(1.568937, 2.330992), (11.554742, 12.259742), (21.944466, 22.476593)]
Cough Indices: [83098,

Maximum Magnitude:  1.0
Average Magnitude:  0.007023905853223804
Standard Deviation:  0.03174946358793863

Audio 30
---------------------
Cough Count: 8
Cough Predicted Timestamps: [2.824271, 3.067958, 8.956021, 9.218438, 20.491062, 25.117729, 45.980771, 55.034854]
Cough Real Timestamps: [(2.793078, 3.27779), (8.936066, 9.33017), (20.474675, 20.974301), (25.098071, 25.687304), (45.96317, 46.50068), (55.001481, 55.60855)]
Cough Indices: [135565, 147262, 429889, 442485, 983571, 1205651, 2207077, 2641673]

Maximum Magnitude:  1.0
Average Magnitude:  0.007530570731160467
Standard Deviation:  0.027722629042375917

Audio 31
---------------------
Cough Count: 4
Cough Predicted Timestamps: [12.716875, 23.200188, 43.338, 56.364333]
Cough Real Timestamps: [(12.639468, 13.347774), (23.083501, 23.664675), (43.297456, 44.114731), (56.373868, 56.827911)]
Cough Indices: [610410, 1113609, 2080224, 2705488]

Maximum Magnitude:  1.0
Average Magnitude:  0.0046332295933509265
Standard Deviation:  0.026134

Maximum Magnitude:  1.0
Average Magnitude:  0.020624098362716604
Standard Deviation:  0.0478122776399031

Audio 46
---------------------
Cough Count: 14
Cough Predicted Timestamps: [2.201354, 2.466542, 13.230396, 13.521062, 13.867792, 19.778458, 22.574917, 29.293979, 34.907917, 40.419917, 43.70525, 43.9785, 44.216542, 53.910667]
Cough Real Timestamps: [(1.83731, 2.251086), (2.333841, 2.851061), (13.178441, 13.558689), (13.629174, 14.053939), (22.519576, 23.023093), (29.249218, 29.656253), (34.848207, 35.363785), (35.680464, 35.961492), (43.657866, 44.058413), (44.075458, 44.472595), (53.867542, 54.291951)]
Cough Indices: [105665, 118394, 635059, 649011, 665654, 949366, 1083596, 1406111, 1675580, 1940156, 2097852, 2110968, 2122394, 2587712]

Maximum Magnitude:  1.0
Average Magnitude:  0.004537250110882498
Standard Deviation:  0.03188843483936228

Audio 47
---------------------
Cough Count: 7
Cough Predicted Timestamps: [1.453229, 6.552354, 7.104229, 12.21275, 29.927438, 40.053521, 50.80

In [53]:
# Comparing the real timestamps and predicted timestamps.

def check_performance(predicted_timestamps, real_timestamps):

    # Margin of error allowed in matching timestamps, in seconds
    time_margin = 0.40

    # Display the lists of predicted and actual cough timestamps
    #print(predicted_timestamps, "\n")
    #print(real_timestamps, "\n")

    # Initialize counters for true positives and false positives
    true_positive = 0 
    false_positive = 0

    # Determine the longer list between predicted and actual cough timestamps

    if len(predicted_timestamps) > len(real_timestamps):
        for pred in predicted_timestamps:

            # Check for matches between the predicted timestamp and actual cough timestamps within a margin of error
            match = list((rt for rt in real_timestamps if pred - time_margin < rt[0] < pred + time_margin or pred - time_margin < rt[1] < pred + time_margin))

            if len(match) != 0:
                true_positive += 1
            else:
                false_positive += 1

    else:

        for pred in real_timestamps:

            # Check for matches between the actual timestamp and predicted cough timestamps within a margin of error
            match = list((rt for rt in predicted_timestamps  if pred[0] - time_margin < rt < pred[0] + time_margin or pred[1] - time_margin < rt < pred[1] + time_margin))

            if len(match) != 0:
                true_positive += 1
            else:
                false_positive += 1

    return true_positive, false_positive

In [55]:
# General Performance of the Model
performances = []
true_positives = []
false_positives = []

for key in sorted(all_timestamp_data.keys()):
        
    data = all_timestamp_data.get(key)
    
    predicted_timestamps = data[0]
    real_timestamps = data[1]
    
    true_positive, false_positive = check_performance(predicted_timestamps, real_timestamps)
    
    true_positives.append(true_positive)
    false_positives.append(false_positive)
    
    precision = true_positive / (true_positive + false_positive)
    # Recall is assumed as 1 since the model does not predict the absence of coughs
    recall = 1

    f1_score = round(2 * (precision * recall) / (precision + recall), 3)
    print("Audio {}: {}\n".format(key,f1_score))

    
model_performance = np.sum(true_positives) / np.sum(true_positives + false_positives)
print("\nModel Performance: ", model_performance)

Audio 1: 0.788

Audio 2: 0.909

Audio 3: 1.0

Audio 4: 1.0

Audio 5: 1.0

Audio 6: 0.444

Audio 7: 0.824

Audio 8: 0.7

Audio 9: 0.441

Audio 10: 0.444

Audio 11: 0.714

Audio 12: 0.667

Audio 13: 0.667

Audio 14: 1.0

Audio 15: 1.0

Audio 16: 1.0

Audio 17: 0.667

Audio 18: 1.0

Audio 19: 0.596

Audio 20: 1.0

Audio 21: 0.824

Audio 22: 1.0

Audio 23: 0.4

Audio 24: 0.857

Audio 25: 1.0

Audio 26: 1.0

Audio 27: 0.727

Audio 28: 0.667

Audio 29: 0.432

Audio 30: 1.0

Audio 31: 1.0

Audio 32: 1.0

Audio 33: 0.667

Audio 34: 0.588

Audio 35: 0.727

Audio 36: 0.889

Audio 37: 0.741

Audio 38: 0.667

Audio 39: 0.612

Audio 40: 0.667

Audio 41: 1.0

Audio 42: 0.0

Audio 43: 0.5

Audio 44: 0.857

Audio 45: 1.0

Audio 46: 0.923

Audio 47: 0.933

Audio 48: 1.0

Audio 49: 0.857

Audio 50: 0.5

Audio 51: 0.526


Model Performance:  0.5178571428571429
