In [None]:
%cd /content/
!gdown https://drive.google.com/uc?id=1Oq9UgA9cEGMNRGvF7oNKkFOg6udsDprl

In [None]:
!mkdir -p /content/final_public_train
%cd '/content/aicv115m_final_public_train/public_train_audio_files/'
!for i in *.wav; do name=`echo "${i%.*}"` ; ffmpeg -y -i "${name}.wav" -ar 16000 -ac 1 "/content/final_public_train/${name}.wav"; done

In [None]:
import librosa
import librosa.display
from tqdm import tqdm

import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from tensorflow import keras
import tensorflow as tf

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import BatchNormalization, Dropout, Conv2D, Dense, GlobalAveragePooling2D, concatenate
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import optimizers
from tensorflow.keras.utils import plot_model, Sequence

from datetime import datetime
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import matplotlib
import matplotlib.pyplot as plt
import itertools
import pylab
from shutil import copyfile

In [None]:
train_meta_df = pd.read_csv("/content/aicv115m_final_public_train/public_train_metadata.csv")

In [None]:
train_audio_dir = "/content/final_public_train/"
train_meta_df["audio_path"] = train_meta_df["uuid"].apply(lambda x: train_audio_dir + x + ".wav")

In [None]:
train_meta_df.drop(train_meta_df[train_meta_df['uuid'] == "23ccaa28-8cb8-43e4-9e59-112fa4dc6559"].index, inplace = True)
idx_lst = [169, 1063, 2064, 2297, 2512, 2723, 2832, 3143, 3600, 3774, 3820, 4191, 4378]
df_covid = train_meta_df.iloc[idx_lst][["uuid","assessment_result", "audio_path"]].reset_index(drop=True)

In [None]:
train_nonote_df = train_meta_df[train_meta_df.audio_noise_note.isnull()].reset_index(drop=True)
train_nonote_df = train_nonote_df[["uuid","assessment_result", "audio_path"]]

In [None]:
train_data = pd.concat([train_nonote_df, df_covid]).reset_index(drop=True)
train_data = train_data[["uuid","assessment_result", "audio_path"]]

### Get audio <= 6.13 seconds

In [None]:
# Get duration and sample rate
import scipy
from scipy.io import wavfile

def getRateAndDuration(_df):
    sample_rate_lst = []
    seconds_lst = []
    for index, row in _df.iterrows():
        sample_rate, data = wavfile.read(row["audio_path"])
        sample_rate_lst.append(sample_rate)
        len_data = len(data)  # holds length of the numpy array
        t = len_data / sample_rate  # returns duration but in floats
        seconds_lst.append(t)
    _df["sample_rate"] = sample_rate_lst
    _df["seconds"] = seconds_lst
    return _df

In [None]:
train_data = getRateAndDuration(train_data)

In [None]:
!rm -rf /content/cough_data
!mkdir -p /content/cough_data

In [None]:
tmp_df = train_data[(train_data["seconds"] <= 6.13)].reset_index(drop=True)

for index, row in tmp_df.iterrows():
    src = row["audio_path"]
    dst = "/content/cough_data/"+row["uuid"]+".wav"
    copyfile(src, dst)

In [None]:
!pip install pydub

In [None]:
from pydub import AudioSegment
from pydub.silence import split_on_silence, detect_nonsilent
import os

def splitAudio(new_covid_data, file_path, assessment_result, dst_pth, min_silence_len, keep_silence):
    file_name = os.path.basename(file_path)
    sound_file = AudioSegment.from_wav(file_path)
    audio_chunks = split_on_silence(sound_file, min_silence_len=min_silence_len, silence_thresh=sound_file.dBFS-16, keep_silence=keep_silence)
    for i, chunk in enumerate(audio_chunks):
        uuid = "chunk" + str(i) + file_name.split(".")[0]
        out_file = dst_pth + uuid + ".wav"
        chunk.export(out_file, format="wav")
        sample_rate, data = wavfile.read(out_file)
        len_data = len(data)
        t = len_data / sample_rate
        new_covid_data.append([uuid, assessment_result, out_file, sample_rate, t])

In [None]:
!rm -rf /content/covid_cut_audio
!mkdir /content/covid_cut_audio

In [None]:
def createNewDataFrame(df, min_silence_len, keep_silence):
    new_covid_data = []
    for index, row in df.iterrows():
        splitAudio(new_covid_data, row["audio_path"], row["assessment_result"], "/content/covid_cut_audio/", min_silence_len, keep_silence)
    new_df = pd.DataFrame(new_covid_data, columns = ['uuid', 'assessment_result', 'audio_path', 'sample_rate', 'seconds'])
    return new_df

In [None]:
long_covid_audio_df = train_data[(train_data["seconds"] > 6.13) & (train_data["assessment_result"] == 1)].reset_index(drop=True)
new_covid_df = createNewDataFrame(long_covid_audio_df, 1000, 1000)
print(len(new_covid_df[(new_covid_df["seconds"] <= 6.13)]))
print(len(new_covid_df[(new_covid_df["seconds"] > 6.13)]))

In [None]:
long_covid_audio_df2 = new_covid_df[(new_covid_df["seconds"] > 6.13)].reset_index(drop=True)
new_covid_df2 = createNewDataFrame(long_covid_audio_df2, 1000, 1000)
print(len(new_covid_df2[(new_covid_df2["seconds"] <= 6.13)]))
print(len(new_covid_df2[(new_covid_df2["seconds"] > 6.13)]))

In [None]:
# Loại bỏ không phải tiếng ho
new_covid_df2 = new_covid_df2.drop([0,1]).reset_index(drop=True)

In [None]:
long_covid_audio_df3 = new_covid_df2[(new_covid_df2["seconds"] > 6.13)].reset_index(drop=True)
new_covid_df3 = createNewDataFrame(long_covid_audio_df3, 900, 900)
print(len(new_covid_df3[(new_covid_df3["seconds"] <= 6.13)]))
print(len(new_covid_df3[(new_covid_df3["seconds"] > 6.13)]))

In [None]:
long_covid_audio_df4 = new_covid_df3[(new_covid_df3["seconds"] > 6.13)].reset_index(drop=True)
new_covid_df4 = createNewDataFrame(long_covid_audio_df4, 800, 800)
print(len(new_covid_df4[(new_covid_df4["seconds"] <= 6.13)]))
print(len(new_covid_df4[(new_covid_df4["seconds"] > 6.13)]))

In [None]:
# xoa am thanh ho ngan qua 1.061
new_covid_df4 = new_covid_df4.drop([45]).reset_index(drop=True)

In [None]:
long_covid_audio_df5 = new_covid_df4[(new_covid_df4["seconds"] > 6.13)].reset_index(drop=True)
new_covid_df5 = createNewDataFrame(long_covid_audio_df5, 700, 700)
print(len(new_covid_df5[(new_covid_df5["seconds"] <= 6.13)]))
print(len(new_covid_df5[(new_covid_df5["seconds"] > 6.13)]))

In [None]:
# xoa am thanh ho ngan qua 0.931
new_covid_df5 = new_covid_df5.drop([7]).reset_index(drop=True)

In [None]:
long_covid_audio_df6 = new_covid_df5[(new_covid_df5["seconds"] > 6.13)].reset_index(drop=True)
new_covid_df6 = createNewDataFrame(long_covid_audio_df6, 500, 500)
print(len(new_covid_df6[(new_covid_df6["seconds"] <= 6.13)]))
print(len(new_covid_df6[(new_covid_df6["seconds"] > 6.13)]))

In [None]:
new_covid_df6 = new_covid_df6.drop([6, 7, 8, 9, 10, 11, 12, 13]).reset_index(drop=True)

In [None]:
not_covid_df1 = train_data[(train_data["seconds"] <= 6.13) & (train_data["assessment_result"] == 0)]
covid_df0 = train_data[(train_data["seconds"] <= 6.13) & (train_data["assessment_result"] == 1)]

covid_df1 = new_covid_df[(new_covid_df["seconds"] <= 6.13)]
covid_df2 = new_covid_df2[(new_covid_df2["seconds"] <= 6.13)]
covid_df3 = new_covid_df3[(new_covid_df3["seconds"] <= 6.13)]
covid_df4 = new_covid_df4[(new_covid_df4["seconds"] <= 6.13)]
covid_df5 = new_covid_df5[(new_covid_df5["seconds"] <= 6.13)]
covid_df6 = new_covid_df6[(new_covid_df6["seconds"] <= 6.13)]

final_train_data = pd.concat([not_covid_df1, covid_df0, covid_df1, covid_df2, covid_df3, covid_df4, covid_df5, covid_df6]).reset_index(drop=True)
final_train_data

In [None]:
## khong phai tieng ho hoac tieng ho nho
## 126, 2229, 2291, 2426, 2483, 1974, 2054
final_train_data = final_train_data.drop([126, 2229, 2291, 2426, 2483, 1974, 2054]).reset_index(drop=True)
final_train_data = final_train_data[final_train_data["seconds"] > 1.4].reset_index(drop=True)

In [None]:
final_train_data.assessment_result.value_counts()

### Remove Silent Audio

In [None]:
!pip install aubio

In [None]:
import sys
import aubio
import numpy as np
import os

def avg_fq(audio_path):
    win_s = 2048
    hop_s = win_s // 4

    s = aubio.source(audio_path, hop_s)
    tolerance = 0.8
    pitch_o = aubio.pitch("yin", win_s, hop_s, s.samplerate)
    pitch_o.set_unit("midi")
    pitch_o.set_tolerance(tolerance)

    pitches = []
    confidences = []

    total_frames = 0
    while True:
        samples, read = s()
        pitch = pitch_o(samples)[0]
        pitches += [pitch]
        total_frames += read
        if read < hop_s: break

    a = np.array(pitches)
    return a.mean()


def hasNoSound(audio_path):
    frq_mean = avg_fq(audio_path)
    isSilent = False
    if frq_mean < 1:
        isSilent = True
    return isSilent

In [None]:
noSoundLST = []
for index, row in final_train_data.iterrows():
    if (hasNoSound(row["audio_path"])):
        noSoundLST.append(row["audio_path"])

In [None]:
final_train_data = final_train_data[~final_train_data["audio_path"].isin(noSoundLST)].reset_index(drop=True)

In [None]:
final_train_data.assessment_result.value_counts()

### Data augmentation

In [None]:
import json
import math
# import noisereduce as nr

import random

def shiftAudio(data, sampling_rate, shift_max):
    shift = np.random.randint(sampling_rate * shift_max)
    shift_direction = random.choice(['right', 'both'])
    if shift_direction == 'right':
        shift = -shift
    elif shift_direction == 'both':
        direction = np.random.randint(0, 2)
        if direction == 1:
            shift = -shift
    augmented_data = np.roll(data, shift)
    # Set to silence for heading/ tailing
    if shift > 0:
        augmented_data[:shift] = 0
    else:
        augmented_data[shift:] = 0
    return augmented_data

# https://www.kaggle.com/huseinzol05/sound-augmentation-librosa
# https://medium.com/@makcedward/data-augmentation-for-audio-76912b01fdf6
def audio_augmentation(samples, sr):
    y_aug = samples.copy()
    dyn_change = np.random.uniform(low=1.5,high=3)
    y_aug = y_aug * dyn_change

    y_noise1 = samples.copy()
    noise_amp = np.random.uniform(0.001, 0.005) * np.amax(y_noise1)
    # noise_amp = 0.005*np.random.uniform() * np.amax(y_noise1)
    y_noise1 = y_noise1.astype('float64') + noise_amp * np.random.normal(size=y_noise1.shape[0])

    y_noise2 = samples.copy()
    noise = np.random.randn(len(y_noise2))
    augmented_data = y_noise2 + np.random.uniform(0.001, 0.005) * noise
    augmented_data = augmented_data.astype(type(y_noise2[0]))

    y_shift = samples.copy()
    # timeshift_fac = 0.2 *2*(np.random.uniform()-0.5)  # up to 20% of length
    timeshift_fac = 0.1 *2*(np.random.uniform()-0.5)
    start = int(y_shift.shape[0] * timeshift_fac)
    if (start > 0):
        y_shift = np.pad(y_shift,(start,0),mode='constant')[0:y_shift.shape[0]]
    else:
        y_shift = np.pad(y_shift,(0,-start),mode='constant')[0:y_shift.shape[0]]

    y_hpss = librosa.effects.hpss(samples.astype('float64'))

    y_shift2 = samples.copy()
    y_shift2 = shiftAudio(y_shift2, sr, 0.1)

    return [y_aug, y_noise1, y_shift, y_hpss[1], y_shift2, augmented_data]
  

In [None]:
!rm -rf /content/audio_augmentation
!mkdir -p /content/audio_augmentation

In [None]:
from pydub import AudioSegment
import random

def changeVolume(audio_pth, dest_pth, isIncrease):
    delta = 0
    if isIncrease == True:
        delta = random.randint(5, 10)
    else:
        delta = random.randint(-7, -2)

    song = AudioSegment.from_wav(audio_pth)
    song_aug = song + delta
    song_aug.export(dest_pth, "wav")

In [None]:
import soundfile as sf

def dataAugmentation(df, is_test):
    new_data = []
    for index, row in df.iterrows():
        audio, sample_rate = librosa.load(row["audio_path"], row["sample_rate"])
        new_data.append([row["uuid"], row["assessment_result"], row["audio_path"], row["sample_rate"], row["seconds"], "original"])
        if is_test == False:
            audio_aug = audio_augmentation(audio, sample_rate)
            if row["assessment_result"] == 0:
                # num_aug = random.choice([2, 3])
                num_aug = 4
                audio_aug = random.sample(audio_aug, num_aug)
            for idx, y_aug in enumerate(audio_aug):
                aug_name = "aug" + str(idx)
                aud_uid = aug_name + row["uuid"]
                file_pth = "/content/audio_augmentation/" + aud_uid + ".wav"
                sf.write(file_pth, y_aug, sample_rate)
                new_data.append([aud_uid, row["assessment_result"], file_pth, row["sample_rate"], row["seconds"], "augmentation"])
    
            for isIncrease in [True, False]:
                aud_uid = "increase" + str(isIncrease) + row["uuid"]
                file_pth = "/content/audio_augmentation/" + aud_uid + ".wav"
                changeVolume(row["audio_path"], file_pth, isIncrease)
                new_data.append([aud_uid, row["assessment_result"], file_pth, row["sample_rate"], row["seconds"], "augmentation"])
    
    new_df = pd.DataFrame(new_data, columns = ['uuid', 'assessment_result', 'audio_path', 'sample_rate', 'seconds', 'type_data'])
    return new_df

In [None]:
final_aug_data = dataAugmentation(final_train_data, False)
final_aug_data

In [None]:
final_aug_data["assessment_result"].value_counts()

In [None]:
!rm -rf /content/final_audio_data
!mkdir -p /content/final_audio_data
for index, row in final_aug_data.iterrows():
    src = row["audio_path"]
    dst = "/content/final_audio_data/"+row["uuid"]+".wav"
    copyfile(src, dst)

final_aug_data['audio_path'] = final_aug_data['audio_path'].apply(lambda x: "/content/final_audio_data/"+os.path.basename(x))
final_aug_data

### Extract MFCC Features with Jlibrosa

In [None]:
%cd /content
!git clone https://github.com/train255/kambria-challenge

In [None]:
!rm -rf /content/mfcc_features
!mkdir -p /content/mfcc_features
%cd /content/kambria-challenge/generateMFCC/
!javac -source 1.7 -target 1.7 -d bin -cp lib/jlibrosa-1.1.8-SNAPSHOT-jar-with-dependencies.jar src/com/example/Main.java
!java -cp lib/jlibrosa-1.1.8-SNAPSHOT-jar-with-dependencies.jar:bin com.example.Main /content/final_audio_data /content/mfcc_features 16000 120

In [None]:
target_names = ['not_covid',  'covid']
num_rows = 120
num_columns = 192
num_channels = 1

n_fft = 4096
hop_length = 512
n_mels = 512

In [None]:
def add_pad_len(x):
    if x.shape[1] <= num_columns:
        pad_width = num_columns - x.shape[1]
        x = np.pad(x, pad_width=((0,0),(0,pad_width)), mode='constant')
    return x

In [None]:
from pathlib import Path
import re

def readFeatures(uuid, features_dir):
    txt_pth = features_dir + uuid + ".txt"
    mfcc_txt = Path(txt_pth).read_text()
    cols = []
    for cols_txt in mfcc_txt.split("n"):
        if cols_txt != "":
            rows_str = cols_txt.split(",")
            rows = []
            for row in rows_str:
                if row != "":
                    rows.append(float(row))
            cols.append(rows)
    features = np.array(cols, dtype=np.float)
    if (features.shape[0] > 192 or features.shape[1] > 192):
        print(features.shape)
        print(uuid)
    features = add_pad_len(features)
    return features

In [None]:
final_aug_data["feature"] = final_aug_data["uuid"].apply(readFeatures, features_dir="/content/mfcc_features/")
final_aug_data.to_csv("/content/drive/MyDrive/ML/covid_cough_detection/final_aug_data.csv", index=False)
final_aug_data

### Training

In [None]:
from sklearn.model_selection import StratifiedKFold
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

tmp_df = final_aug_data[final_aug_data["type_data"] == "original"].reset_index(drop=True)
tmp_df = tmp_df[["audio_path", "assessment_result"]]
fold = 0
for train_ix, test_ix in kfold.split(tmp_df, tmp_df["assessment_result"]):
    tmp_df.loc[test_ix, ["fold"+str(fold)]] = int(fold)
    fold += 1

tmp_df.drop(columns=["assessment_result"], inplace=True)
tmp_df.fold0.value_counts()

In [None]:
final_df = pd.concat([final_aug_data, tmp_df], axis=1)
final_df

In [None]:
def create_cnn():
    model = Sequential()
    model.add(Conv2D(16, (7,7), input_shape=(num_rows, num_columns, num_channels), activation='relu', padding="same"))
    model.add(BatchNormalization())
    model.add(Conv2D(32, (3,3), activation='relu', padding="same"))
    model.add(BatchNormalization())
    model.add(Conv2D(64, (3,3), activation='relu', padding="same"))
    model.add(BatchNormalization())
    model.add(Conv2D(128, (3,3), activation='relu', padding="same"))
    model.add(BatchNormalization())
    model.add(Conv2D(256, (3,3), activation='relu', padding="same"))
    model.add(BatchNormalization())
    model.add(Conv2D(512, (1,1), activation='relu', padding="same"))
    model.add(BatchNormalization())
    model.add(GlobalAveragePooling2D())
    model.add(Dense(1, activation='sigmoid'))
    return model

In [None]:
class DataGenerator(Sequence):
    def __init__(self,
                _X,
                batch_size=32,
                n_channels=1,
                n_columns=470,
                n_rows=120,
                shuffle=True):
        self.batch_size = batch_size
        self.X = _X
        self.n_channels = n_channels
        self.n_columns = n_columns
        self.n_rows = n_rows
        self.shuffle = shuffle
        self.img_indexes = np.arange(len(self.X))
        self.on_epoch_end()
        
    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.img_indexes) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # Find list of IDs
        list_IDs_temps = [self.img_indexes[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temps)
        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.X))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temps):
        X = np.empty((self.batch_size, self.n_rows, self.n_columns))
        # X = np.empty((self.batch_size, self.n_rows, self.n_columns, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)
        for i, ID in enumerate(list_IDs_temps):
            # x_features = self.X.iloc[ID]["feature"].tolist()
            x_features = self.X.iloc[ID]["feature"]
            label = self.X.iloc[ID]["assessment_result"]
            # print(np.array(x_features).shape)
            # X[i] = np.array(x_features)
            X[i] = x_features
            y[i] = label
        X = X.reshape(X.shape[0], self.n_rows, self.n_columns, self.n_channels)
        # y_convert = keras.utils.to_categorical(y, num_classes=self.n_classes)
        return X, y

In [None]:
params = dict(
    batch_size=16,
    n_rows=num_rows,
    n_columns=num_columns,
    n_channels=num_channels,
)
params_train = dict(
    shuffle=True,
    **params
)
params_valid = dict(
    shuffle=False,
    **params
)

In [None]:
import matplotlib.pyplot as plt

def plot_his(history):
    plt.figure(1, figsize = (15,8))
    plt.subplot(221)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'valid'])
    plt.subplot(222)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'valid'])
    plt.show()

In [None]:
def train_model(model, train_gen, val_gen, fold):
    checkpoint_model_path = "/content/cnn_"+str(fold)+".hdf5"
    metric = "val_loss"
    print("METRIC:", metric)

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=["accuracy"])

    num_epochs = 100

    checkpointer = ModelCheckpoint(
        filepath=checkpoint_model_path,
        monitor=metric, verbose=1, save_best_only=True)
    es_callback = EarlyStopping(monitor=metric, patience=10, verbose=1)
    reduce_lr = ReduceLROnPlateau(monitor=metric, factor=0.3, patience=1, verbose=1, min_delta=0.0001, cooldown=1, min_lr=0.00001)

    history = model.fit(
        train_gen,
        epochs=num_epochs,
        validation_data=val_gen,
        callbacks=[checkpointer,es_callback,reduce_lr],
        verbose=1
    )

    plot_his(history)

In [None]:
for fold in range(5):
    X_valid = final_df[final_df["fold"+str(fold)] == fold].reset_index(drop=True)
    X_train = final_df[final_df["fold"+str(fold)] != fold].reset_index(drop=True)
    train_generator = DataGenerator(X_train, **params_train)
    valid_generator = DataGenerator(X_valid, **params_valid)
    cnn_model = create_cnn()
    train_model(cnn_model, train_generator, valid_generator, fold)

### Verify train test data

In [None]:
infer_df = final_df[final_df["type_data"] == "original"].reset_index(drop=True)

In [None]:
params = dict(
    batch_size=1,
    n_rows=num_rows,
    n_columns=num_columns,
    n_channels=num_channels,
)
params_valid = dict(
    shuffle=False,
    **params
)
full_gen = DataGenerator(infer_df, **params_valid)

pred_models = []
for fold in range(5):
    # if fold == 4:
        checkpoint_model_path = "/content/cnn_"+str(fold)+".hdf5"
        cnn_model = create_cnn("mobilenet")
        cnn_model.load_weights(checkpoint_model_path)
        y_preds = cnn_model.predict(full_gen)
        predictions = [p[0] for p in y_preds]
        pred_models.append(predictions)

In [None]:
infer_df["pred"] = np.average(pred_models, axis=0)

from sklearn.metrics import classification_report

print(classification_report(infer_df["assessment_result"], np.round(infer_df["pred"]), target_names=["not_covid", "covid"]))