In [None]:
!pip install librosa
!pip install tensorflow

In [None]:
import os
import math
import json
import librosa
import zipfile
import numpy as np
import tensorflow.keras as keras

Download and unzip dataset .zip from Kaggle.com

In [None]:
path = 'coronahack-respiratory-sound-dataset.zip'
zipFile = zipfile.ZipFile(path, 'r')

zipFile.extractall('')
zipFile.close()

Pre-Processing of Coronahack Respiratory Sound Dataset below... 

In [None]:
training_dataset_path = 'CoronaHack-Respiratory-Sound-Dataset/data/train'
test_dataset_path = 'CoronaHack-Respiratory-Sound-Dataset/data/test'

SAMPLE_RATE = 22050
# DURATION = 30
# SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

numberOfSegments = 10
hopLength = 512
n_MFCC = 13
n_FFT = 2048


def getLabel(metadata):
    label = 0
    try:
        if metadata['asthma'] == 'True':
            # has asthma
            if metadata['covid_status'] == 'healthy':
                print('asthma')
                return 1
            print('asthma + covid')
            return 3
    except:
        label = 0
    try:
        if metadata['covid_status'] == 'healthy':
            return 0
        else:
            return 1
    except:
        return 4


def preProcess(dataset_path):
    data = {
        "mapping": [
            'Healthy',
            'Astma',
            'Covid',
            'Astma-Covid',
            'inconclusive'
        ],
        "mfcc": [],
        "labels": [],
    }

    for i, (dirpath, _, filenames) in enumerate(os.walk(dataset_path)):
        if dirpath == dataset_path:
            continue

        if len(filenames) == 0:
            continue

        metadata = {}

        for file in filenames:
            path = os.path.join(dirpath, file)

            if str(file) == 'metadata.json':
                f = open(path)
                metadata = json.load(f)

                label = getLabel(metadata)
                data["labels"].append(label)

                continue

            # load audio
            try:
                signal, sampleRate = librosa.load(path, sr=SAMPLE_RATE)
            except:
                print("Error processing")
                continue

            # get MFCC
            MFCC = librosa.feature.mfcc(
                y=signal,
                sr=sampleRate,
                n_fft=n_FFT,
                n_mfcc=n_MFCC,
                hop_length=hopLength).T

            # store mfcc for segment
            data["mfcc"].append(MFCC.tolist())

    return data

train_data = preProcess(training_dataset_path)
test_data = preProcess(test_dataset_path)

Just some output to see number of MFCCs created during pre-processing. 

In [None]:
def displayMFCCs(data):
    count = 0
    for i in range(0, len(data["mfcc"])):
        for j in range(0, len(data["mfcc"][i])):
            for mffcc in data["mfcc"][i][j]:
                count += 1

    print("number of MFCCs: {}".format(count))

displayMFCCs(train_data)
displayMFCCs(test_data)

In [None]:
"""
Neural Network Implementation
"""

# train set
train_inputs = np.arrary(train_data["mfcc"])
train_targets = np.array(train_data["labels"])

# test set
test_inputs = np.arrary(train_data["mfcc"])
test_targets = np.arrary(train_data["labels"])

# build the network architecture
model = keras.Sequential([
    # input layer 
    keras.layers.Flatten(input_shape=(input.shape[1], input.shape[2])),

    # 1st hidden layer 
    # relu activiation -> Rectified Liner Unit (ReLU) 
    keras.layers.Dense(512, activation="relu"),

    # 2nd hidden layer 
    keras.layers.Dense(256, activation="relu"),

    # 3rd hidden layer 
    keras.layers.Dense(64, activation="relu"),

    # output layer 
    # softmax -> normalises output
    keras.layers.Dense(5, activation="softmax")
])

# compile the network
optimizer = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(
    optimizer=optimizer, 
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)
model.summary()

# train the network
model.fit(
    train_inputs, 
    train_targets, 
    validation_data=(
        test_inputs, 
        test_targets
    ),
    batch_size=32
)