## <span style='color:red'> GitHub Repository for Doppler VGE Detection Inference </span>

## <span style='color:green'> Reading audio file(s) </span>

- ###  It can be a single audio file or a batch of files in a folder (different audio formats)
- ### Quality check 
- ### Sample rate check (resampling to 8 kHz if needed)
- ### Length check 

In [13]:
## 1) Folder selection for the data to process
import numpy as np
from tkinter import *
import soundfile as sf
import keras, glob, os
import tensorflow as tf
from tkinter import ttk
import scipy.signal as sps
from tensorflow import keras
from tkinter import filedialog
import tensorflow_addons as tfa

win= Tk()
win.geometry("250x100")
win.title("Folder Selection Window")
def select_file():
    global path
    path = filedialog.askdirectory(title="Select a Folder")
    
Label(win, text="Click Select a Folder", font=('Aerial 10 bold')).pack(pady=20)

button = ttk.Button(win, text="Select a Folder", command= select_file)
close = ttk.Button(win, text="Close", command= win.destroy)

button.pack(ipadx=20, pady=5, side = 'left')
close.pack(ipadx=20, pady=5, side = 'right')

win.mainloop()

In [14]:
## 2) Grabbing the audio files within the folder

# Supporting single extension of audio files

# filelist = []
# os.chdir(path)
# for file in glob.glob("*.flac"):
#     global filelist
#     filelist.append(os.path.join(path, file))

# ************************************************* #

# Supporting multiple extensions of audio files
global filelist
filelist = []
os.chdir(path)
extensions = ("*.wav","*.flac", "*.mp3")
for extension in extensions:
    
    filelist.extend(glob.glob(path+"/"+extension))

In [15]:
## 3) Sample rate check
new_rate = 8000
arrayData = {}
for file in filelist:
    data,  sampling_rate = sf.read(file)
    ## Only uncomment two lines below for multi-channel audio data
#     if data.shape[1] == 2:
#         data = data[:,1]
    if sampling_rate != new_rate:
        number_of_samples = round(len(data) * float(new_rate) / sampling_rate)
        data = sps.resample(data, number_of_samples)
        arrayData[os.path.split(file)[1]] = data
        print('******** The audio file {} was resampled! **********'.format(os.path.split(file)[1]))
    else:
        arrayData[os.path.split(file)[1]] = data
        print('******** The audio file {} was not resampled! **********'.format(os.path.split(file)[1]))

******** The audio file syntheticDopplerAudioCombined_1_10.wav was not resampled! **********
******** The audio file syntheticDopplerAudioCombined_1_16.wav was not resampled! **********
******** The audio file syntheticDopplerAudioCombined_1_18.wav was not resampled! **********
******** The audio file syntheticDopplerAudioCombined_1_20.wav was not resampled! **********
******** The audio file syntheticDopplerAudioCombined_1_27.wav was not resampled! **********
******** The audio file syntheticDopplerAudioCombined_1_8.wav was not resampled! **********
******** The audio file syntheticDopplerAudioCombined_1_9.wav was not resampled! **********
******** The audio file syntheticDopplerAudioCombined_3_11.wav was not resampled! **********
******** The audio file syntheticDopplerAudioCombined_3_30.wav was not resampled! **********
******** The audio file syntheticDopplerAudioCombined_3_32.wav was not resampled! **********
******** The audio file syntheticDopplerAudioCombined_3_33.wav was not r

## <span style='color:green'> Check if subclavian or precordial is selected </span>
- ### Selection of proper model to test 

In [17]:
## To use the pre-trained models, the following steps should be performed:
###### 1) Create a folder in the selected folder above and name it "Models" (Make sure "M" is UPPERCASE)
###### 2) Download and place the models in GitHub, i.e., "RAW_PRECORDIAL.h5" and "RAW_PRECORDIAL.h5" within the "Models" folder
###### 3) Run the script below.
global modelType
modelType = os.path.split(path)[1]
mainFolder = os.path.split(path)[0]
if modelType == 'Subclavian':
    modelPath = os.path.join(os.path.join(mainFolder, 'Models'), 'RAW_SUBCLAVIAN.h5')
    model_to_apply = keras.models.load_model(modelPath)
else:
    modelPath = os.path.join(os.path.join(mainFolder, 'Models'), 'RAW_PRECORDIAL.h5')
    model_to_apply = keras.models.load_model(modelPath)

## <span style='color:green'> Different algorithms to check on the 10-second long segments </span>
- ### Majority voting for non-overlap segments 
- ### Majority voting with a rolling average (50% overlap)

In [18]:
## Majority voting for non-overlap segments

L =  len(arrayData) # files to test
timeSlice = 10 # seconds
overlapSize = 5 # seconds
rawDoppler = []
audioAnnotator = []
FinScores = open(modelType + "_NonOverlap_FinalScores.csv",'w')
FinScores.write('File ID,Segment,Predicted Grade\n')

for k, v in arrayData.items():
    v = v.astype(np.float32)
    nClips = int(len(v)/(timeSlice*new_rate))
    if nClips == 0:
        continue
    for m in range(nClips):
        rawDoppler.append(v[timeSlice*m*new_rate:timeSlice*(m+1)*new_rate])
        audioAnnotator.append(k)
        rawDoppler10s = np.asarray(rawDoppler)
        y_pred = model_to_apply.predict(rawDoppler10s)
        print('File ID: {}, Segment: {}, Predicted Grade: {}'.format(audioAnnotator, m, np.argmax(y_pred)))
        fileID = str(audioAnnotator)
        segmentID = str(m)
        PredictedGrade = str(np.argmax(y_pred))
        line = ",".join([fileID,segmentID,PredictedGrade])
        FinScores.write(line+"\n")                

        rawDoppler = []
        audioAnnotator = []

    print("***********End of audio file {}***********".format(k))
FinScores.close()

File ID: ['syntheticDopplerAudioCombined_1_10.wav'], Segment: 0, Predicted Grade: 0
***********End of audio file syntheticDopplerAudioCombined_1_10.wav***********
File ID: ['syntheticDopplerAudioCombined_1_16.wav'], Segment: 0, Predicted Grade: 0
***********End of audio file syntheticDopplerAudioCombined_1_16.wav***********
File ID: ['syntheticDopplerAudioCombined_1_18.wav'], Segment: 0, Predicted Grade: 1
***********End of audio file syntheticDopplerAudioCombined_1_18.wav***********
File ID: ['syntheticDopplerAudioCombined_1_20.wav'], Segment: 0, Predicted Grade: 1
***********End of audio file syntheticDopplerAudioCombined_1_20.wav***********
File ID: ['syntheticDopplerAudioCombined_1_27.wav'], Segment: 0, Predicted Grade: 1
***********End of audio file syntheticDopplerAudioCombined_1_27.wav***********
File ID: ['syntheticDopplerAudioCombined_1_8.wav'], Segment: 0, Predicted Grade: 0
***********End of audio file syntheticDopplerAudioCombined_1_8.wav***********
File ID: ['syntheticDopp

In [19]:
## Majority voting with a rolling average (50% overlap)

L =  len(arrayData) # files to test
timeSlice = 10 # seconds
overlapSize = 5 # seconds
rawDoppler = []
audioAnnotator = []
FinScores1 = open(modelType + "_Overlapping_FinalScores.csv",'w')
FinScores1.write('File ID,Segment,Predicted Grade\n')
for k, v in arrayData.items():
    v = v.astype(np.float32)
    nClips = int(len(v)/(overlapSize*new_rate)) - 1
    if nClips == 0:
        continue
    for m in range(nClips):
        rawDoppler.append(v[timeSlice*m*new_rate - m*new_rate*overlapSize : timeSlice*(m+1)*new_rate - m*new_rate*overlapSize])
        audioAnnotator.append(k)
        rawDoppler10s = np.asarray(rawDoppler)
        y_pred = model_to_apply.predict(rawDoppler10s)
        print('File ID: {}, Segment: {}, Predicted Grade: {}'.format(audioAnnotator, m, np.argmax(y_pred)))
        fileID = str(audioAnnotator)
        segmentID = str(m)
        PredictedGrade = str(np.argmax(y_pred))
        line = ",".join([fileID,segmentID,PredictedGrade])
        FinScores1.write(line+"\n")                

        rawDoppler = []
        audioAnnotator = []

    print("***********End of audio file {}***********".format(k))
FinScores1.close()

File ID: ['syntheticDopplerAudioCombined_1_10.wav'], Segment: 0, Predicted Grade: 0
***********End of audio file syntheticDopplerAudioCombined_1_10.wav***********
File ID: ['syntheticDopplerAudioCombined_1_16.wav'], Segment: 0, Predicted Grade: 0
***********End of audio file syntheticDopplerAudioCombined_1_16.wav***********
File ID: ['syntheticDopplerAudioCombined_1_18.wav'], Segment: 0, Predicted Grade: 1
***********End of audio file syntheticDopplerAudioCombined_1_18.wav***********
File ID: ['syntheticDopplerAudioCombined_1_20.wav'], Segment: 0, Predicted Grade: 1
***********End of audio file syntheticDopplerAudioCombined_1_20.wav***********
File ID: ['syntheticDopplerAudioCombined_1_27.wav'], Segment: 0, Predicted Grade: 1
***********End of audio file syntheticDopplerAudioCombined_1_27.wav***********
File ID: ['syntheticDopplerAudioCombined_1_8.wav'], Segment: 0, Predicted Grade: 0
***********End of audio file syntheticDopplerAudioCombined_1_8.wav***********
File ID: ['syntheticDopp