# 1. Add Necessary Library

In [1]:
import librosa
from keras.models import load_model
import numpy as np
import cv2
import IPython.display as ipd

Using TensorFlow backend.


# 2. Load Model

In [2]:
model = load_model("MODEL/model you train.hdf5")

# 3. Preprocessing Function

In [3]:
def by_value(item):
    return item[1]

In [4]:
def Convert_Mfcc2Img(mfcc):
    '''
    Use: Convert mfcc feature to image, in order to save this image
    
    In: Mfcc feature
    Out: Image corresponding to mfcc feature
    '''
    mfcc = np.array(mfcc)
    
    MAX=np.max(mfcc);MIN=np.min(mfcc)
    
    NEW_MAX=255; NEW_MIN=0
    
    img_mfcc = (mfcc-MIN)/(MAX-MIN) * (NEW_MAX-NEW_MIN)
    
    return img_mfcc;

In [5]:
def extract_features(file_name):
    max_pad_len = 180

    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        return None 
     
    return mfccs

In [6]:
def print_prediction(file_name):
    dictionary={0:'Da', 1:'Giang', 2:'Jack', 3:'Misthy', 4:'Ngoc', 5:'Thanh', 6:'Tung'}
    
    prediction_feature = extract_features(file_name)
    prediction_feature = Convert_Mfcc2Img(prediction_feature)
    prediction_feature = cv2.resize(prediction_feature, (64,64))

    prediction_feature = prediction_feature.reshape(1, 64, 64, 1)

    predicted_vector = model.predict(prediction_feature)
    
    dic = dict(dict(zip(np.arange(0,len(predicted_vector[0])), predicted_vector[0])))
    
    sorted_dic = {k:v for k, v in sorted(dic.items(), key=by_value)}
    
    keys=[i for i in sorted_dic.keys()]
    values=[j for j in sorted_dic.values()]
    
    for i in np.arange(1,6):
        print('{}. {} - {}%'.format(i,dictionary[keys[-i]], round(values[-i]*100,2)))

# 4. Test ResNet50

In [None]:
filename = 'TEST\\Tung.wav'

ipd.Audio(filename)

In [None]:
print_prediction(filename)