In [11]:
import tensorflow.keras as keras
import numpy as np
import librosa
import math

In [33]:
# MODEL_PATH = "models/heart_sound_model1_v3_0002_D5s.h5"
MODEL_PATH = "models/heart_sound_model1_v3_0004_D5s.h5"
NUM_SAMPLES_TO_CONSIDER = 22050 # 1 sec

In [35]:
class _heart_sound_spotting_service:
    model = None
    _mappings = [
        "abnormal",
        "artifact",
        "normal"
    ]
    _instance = None

    def predict(self, file_path):
        # extract MFCCs
        MFCCs = self.preprocess(file_path) # (# segments, # coefficients) 

        # convert 2d MFCCs array into 4d array -> (# samples, # segments, # coefficients, # # channels=1)
        MFCCs = MFCCs[np.newaxis, ..., np.newaxis]
        
        print(MFCCs.shape)
        # make prediction
        predictions = self.model.predict(MFCCs) # [[0.1,0.6,0.1]]
        predict_index = np.argmax(predictions)
        predicted_heart_sound = self._mappings[predict_index]

        return predicted_heart_sound

    # pre process method
    def preprocess(self, file_path, n_mfcc=13, n_fft=2048, hop_length=512):

        # load audio file
        signal, sr = librosa.load(file_path, duration=5)
        
        # ensure consistency on the audio file length
        if len(signal) > NUM_SAMPLES_TO_CONSIDER * 5:
            signal = signal[:NUM_SAMPLES_TO_CONSIDER]
        
        # extract MFCCs
        MFCCs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
        print("original: ",MFCCs.shape)
        
        return MFCCs.T

In [37]:
SAMPLES_PER_TRACK=NUM_SAMPLES_TO_CONSIDER*5

In [39]:
def Heart_sound_spotting_service():
    #ensure that we only have 1 instance of HSSS
    if _heart_sound_spotting_service._instance is None:
        _heart_sound_spotting_service._instance = _heart_sound_spotting_service()
        _heart_sound_spotting_service.model = keras.models.load_model(MODEL_PATH)
        print("Model expects input of shape:", _heart_sound_spotting_service.model.input_shape)
    return _heart_sound_spotting_service._instance

In [41]:
HSSS = Heart_sound_spotting_service()
pred1 = HSSS.predict("test_data/112.wav")
pred2 = HSSS.predict("test_data/normal__140_1306519735121_B.wav")

pred3 = HSSS.predict("test_data/artifact__201105040918.wav")
pred4 = HSSS.predict("data/normal/normal__260_1309353164458_C.wav")


print(f"pred keywords {pred1}, {pred2}, {pred3}, {pred4}")



Model expects input of shape: (None, 216, 13, 1)
original:  (13, 215)
(1, 215, 13, 1)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 293ms/step
original:  (13, 216)
(1, 216, 13, 1)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 327ms/step
original:  (13, 216)
(1, 216, 13, 1)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
original:  (13, 216)
(1, 216, 13, 1)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
pred keywords artifact, abnormal, artifact, normal
