In [1]:
#imports

import pandas as pd
import librosa 
import IPython.display as ipd
import matplotlib.pyplot as plt
import librosa.display
from pathlib import Path
from segmentation import segment_cough

import plaidml.keras
plaidml.keras.install_backend()
import keras
import keras.backend as L
from keras.applications.resnet50 import ResNet50
from keras.layers import Flatten, Dense, Conv2D, MaxPooling2D, Input, Dropout, Activation
from keras.models import Model, Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.layers.normalization import BatchNormalization

%matplotlib inline

In [None]:
#Load the metadata
data = pd.read_csv(r'D:\Projects\coughvid\public_dataset\metadata_compiled.csv')

In [None]:
data = data[data["cough_detected"]>0.29]

In [None]:
#as mentioned in the COUGHVID crowdsourcing dataset paper
SAMPLE_RATE = 48000 

In [None]:
#setting input to model
IMAGE_HEIGHT = 512
IMAGE_WIDTH = 512
IMAGE_CHANNELS = 1

In [None]:
data["Filepath"] = "D:\Projects\coughvid\Audio\\"+data["uuid"]

In [None]:
data = data.sort_values(by="cough_detected", ascending = False)

In [None]:
data = data[["uuid","cough_detected","Filepath","status"]]

In [None]:
data.dropna(axis=0, inplace=True)

In [None]:
data["status"].value_counts()

In [None]:
data = data.groupby('status').apply(lambda x : x.sample(835))

In [None]:
data["status"].value_counts()

In [None]:
data

In [None]:
def prepare_dataset(destination, sr=SAMPLE_RATE):
    for i,x in data.iterrows():
        filename = x["Filepath"]
        label = x["status"]
        uuid = x["uuid"]
        if label=="COVID-19":
            try:
                x, sr = librosa.load(filename+".webm", sr = sr)
            except:
                try:
                    x, sr = librosa.load(filename+".ogg", sr = sr)
                except:
                    print("File does not exist")
            cough_segments, cough_mask = segment_cough(x,sr, cough_padding=0)
            dest = destination+"Mel_Spectrogram\\"+label+"\\"+uuid+".png"
            if Path(dest).is_file():
                continue
            else:
                if(len(cough_segments)>0):
                    mel_spec = librosa.feature.melspectrogram(cough_segments[0] , sr = sr, n_fft = 2048, hop_length = 512, n_mels = 90)
                    log_mel_spec = librosa.power_to_db(mel_spec)
                    fig = plt.figure(figsize=(25,10))
                    librosa.display.specshow(log_mel_spec,
                                      x_axis="time",
                                      y_axis="mel",
                                      sr=SAMPLE_RATE)
                    plt.savefig(dest)
                    plt.close()
            dest = destination+"MFCC1\\"+label+"\\"+uuid+".png"
            if Path(dest).is_file():
                continue
            else:
                if(len(cough_segments)>0):
                    mfcc = librosa.feature.mfcc(cough_segments[0] , sr = sr, n_mfcc=13)
                    fig = plt.figure(figsize=(25,10))
                    librosa.display.specshow(mfcc,
                                      x_axis="time",
                                      sr=sr)
                    plt.savefig(dest)
                    plt.close()

In [None]:
prepare_dataset("D:\Projects\coughvid\\")