In [1]:
import numpy as np
import librosa
import os
import tensorflow as tf
from tensorflow.keras import layers, models
from mfcc import generate_features
import edgeimpulse as ei
from tensorflow.keras.optimizers.legacy import Adam


# Define the path to the dataset
root = 'data_resampled/'
folders = ['MinhQuang', 'Unknown', 'Noise']




In [3]:
#load data from folder data
#Save file name to labels

def load_data():
    data = []
    for folder in folders:
        for file in os.listdir(root + folder):
            if file.endswith('.wav'):
                data.append((folder, file))
    return data

def extract_mfcc(audio_path):
    audio, sr = librosa.load(audio_path, sr=16000, duration=1)

    # Số lượng coefficient MFCC
    n_mfcc = 13

    # Độ dài của frame và bước nhảy của frame (chuyển đổi sang số mẫu)
    frame_length = 0.02  # 20 ms
    frame_stride = 0.02  # 20 ms

    
    # Số lượng bộ lọc Mel (tương đương với filter_number)
    filter_number = 32

    # Độ dài của cửa sổ Fourier (tương đương với fft_length)
    n_fft = 256
    # Calculate MFCC features
    # Trích xuất MFCC features
    mfccs = generate_features(implementation_version=4,
                                win_size=101,
                                pre_shift=1,
                                draw_graphs=False,
                                raw_data=audio,
                                axes=['accY'],
                                sampling_freq=sr,
                                frame_length=frame_length,
                                frame_stride=frame_stride,
                                num_filters=filter_number,
                                fft_length=n_fft,
                                low_frequency=0,
                                high_frequency=0,
                                pre_cof=0.98,
                                num_cepstral=n_mfcc
                                )

    # Normalize MFCCs (optional)
    # mfccs = librosa.util.normalize(mfccs, axis=1, norm=1)
    mfccs = np.array(mfccs['features'])
    return mfccs

# Make sure in prepare_data, the data is correctly prepared
def prepare_data(data):
    X, y = [], []

    for label, file in data:
        mfccs = extract_mfcc(os.path.join(root, label, file))
        X.append(mfccs)
        y.append(folders.index(label))

    X = np.array(X)
    y = np.array(y)
    return X, y

In [4]:

data = load_data()
print(len(data))
#split data
np.random.shuffle(data)

# 0.8 train, 0.2 test
split = int(0.8 * len(data))
train_data = data[:split]
test_data = data[split:]


# check if the data is correctly prepared
if not os.path.exists('X_train.npy'):
    X_train, y_train = prepare_data(train_data)
    #save mfcc to file
    np.save('X_train.npy', X_train)
    np.save('y_train.npy', y_train)
else:
    X_train = np.load('X_train.npy')
    y_train = np.load('y_train.npy')




if not os.path.exists('X_test.npy'):
    X_test, y_test = prepare_data(test_data)
    np.save('X_test.npy', X_test)
    np.save('y_test.npy', y_test)
else:
    X_test = np.load('X_test.npy')
    y_test = np.load('y_test.npy')


4608


In [14]:
from model.CNN import CNN
from model.DS_CNN import DS_CNN
from model.TC_Resnet import TC_Resnet
# from model.DCNN import DCNN
# from model.RNN import RNN


input_length = X_train.shape[1]
num_classes = len(folders)


EPOCHES = 100


In [15]:
model_cnn = CNN(input_length, num_classes).build_model()
model_rnn = RNN(input_length, num_classes).build_model()
model_ds_cnn = DS_CNN(input_length, num_classes).build_model()
model_tc_resnet = TC_Resnet().build_smaller_tc_resnet(input_length, num_classes, 0.5)

In [16]:

model_cnn.summary()
model_cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_cnn.fit(X_train, y_train, epochs=EPOCHES, validation_split=0.2)

for _ in range(3):
    try_model = CNN(input_length, num_classes).build_model()
    try_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    try_model.fit(X_train, y_train, epochs=EPOCHES, validation_split=0.2)
    if try_model.evaluate(X_test, y_test)[1] > model_cnn.evaluate(X_test, y_test)[1]:
        model_cnn = try_model


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gaussian_noise_8 (Gaussian  (None, 650)               0         
 Noise)                                                          
                                                                 
 reshape_14 (Reshape)        (None, 50, 13, 1)         0         
                                                                 
 conv2d_49 (Conv2D)          (None, 50, 13, 8)         80        
                                                                 
 max_pooling2d_9 (MaxPoolin  (None, 25, 7, 8)          0         
 g2D)                                                            
                                                                 
 dropout_13 (Dropout)        (None, 25, 7, 8)          0         
                                                                 
 conv2d_50 (Conv2D)          (None, 25, 7, 16)        

In [17]:
# from tensorflow.keras.optimizers.legacy import Adam

# adam = Adam(learning_rate=0.01)

# model_rnn.summary()
# # model_rnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# model_rnn.compile(optimizer=adam, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# model_rnn.fit(X_train, y_train, epochs=EPOCHES, validation_split=0.2)

In [18]:
model_ds_cnn.summary()
model_ds_cnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_ds_cnn.fit(X_train, y_train, epochs=EPOCHES, validation_split=0.2)

for _ in range(3):
    try_model = DS_CNN(input_length, num_classes).build_model()
    try_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    try_model.fit(X_train, y_train, epochs=EPOCHES, validation_split=0.2)
    if try_model.evaluate(X_test, y_test)[1] > model_ds_cnn.evaluate(X_test, y_test)[1]:
        model_ds_cnn = try_model


Model: "model_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_11 (InputLayer)       [(None, 650)]             0         
                                                                 
 reshape_17 (Reshape)        (None, 50, 13, 1)         0         
                                                                 
 gaussian_noise_9 (Gaussian  (None, 50, 13, 1)         0         
 Noise)                                                          
                                                                 
 conv_1 (Conv2D)             (None, 25, 13, 16)        640       
                                                                 
 conv_1_bn (BatchNormalizat  (None, 25, 13, 16)        64        
 ion)                                                            
                                                                 
 conv_1_relu (ReLU)          (None, 25, 13, 16)        0  

In [19]:
model_tc_resnet.summary()
model_tc_resnet.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_tc_resnet.fit(X_train, y_train, epochs=EPOCHES, validation_split=0.2)

# run more  3 times get the best accuracy
for _ in range(3):
    try_model = TC_Resnet().build_smaller_tc_resnet(input_length, num_classes, 0.5)
    try_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    try_model.fit(X_train, y_train, epochs=EPOCHES, validation_split=0.2)
    if try_model.evaluate(X_test, y_test)[1] > model_tc_resnet.evaluate(X_test, y_test)[1]:
        model_tc_resnet = try_model


Model: "model_11"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_12 (InputLayer)       [(None, 650)]                0         []                            
                                                                                                  
 reshape_18 (Reshape)        (None, 50, 13, 1)            0         ['input_12[0][0]']            
                                                                                                  
 conv2d_52 (Conv2D)          (None, 50, 13, 4)            12        ['reshape_18[0][0]']          
                                                                                                  
 conv2d_53 (Conv2D)          (None, 25, 7, 4)             144       ['conv2d_52[0][0]']           
                                                                                           

In [20]:
# save the model

# check the directory train_model
if not os.path.exists('train_model'):
    os.makedirs('train_model')
model_cnn.save('train_model/model_cnn.keras')
# model_rnn.save('train_model/model_rnn.keras')
model_ds_cnn.save('train_model/model_ds_cnn.keras')
model_tc_resnet.save('train_model/model_tc_resnet.keras')