In [44]:
from os import listdir
from os.path import isdir, join
import librosa
import random
import numpy as np
import matplotlib.pyplot as plt
import math 

In [45]:
dataset_path = 'datasets_main_norm'
for name in listdir(dataset_path):
    if isdir(join(dataset_path, name)):
        print(name)

sheila
mot
ba
tat
hai
bat
den
quat


In [46]:
all_targets = [name for name in listdir(dataset_path) if isdir(join(dataset_path, name))]
print(all_targets)

['sheila', 'mot', 'ba', 'tat', 'hai', 'bat', 'den', 'quat']


In [47]:
num_samples = 0
for target in all_targets:
    print(len(listdir(join(dataset_path, target))))
    num_samples += len(listdir(join(dataset_path, target)))
print('Total samples:', num_samples)

100
129
107
64
129
56
72
66
Total samples: 723


In [48]:
target_list = all_targets
feature_sets_file = 'mfcc_sets_1.npz'
perc_keep_samples = 1.0 # 1.0 is keep all samples
val_ratio = 0.1
test_ratio = 0.1
sample_rate = 16000
num_mfcc = 12
len_mfcc = 12


In [49]:
filenames = []
y = []
for index, target in enumerate(target_list):
    print(join(dataset_path, target))
    filenames.append(listdir(join(dataset_path, target)))
    y.append(np.ones(len(filenames[index])) * index)

datasets_main_norm/sheila
datasets_main_norm/mot
datasets_main_norm/ba
datasets_main_norm/tat
datasets_main_norm/hai
datasets_main_norm/bat
datasets_main_norm/den
datasets_main_norm/quat


In [50]:
filenames = [item for sublist in filenames for item in sublist]
y = [item for sublist in y for item in sublist]

In [51]:
filenames_y = list(zip(filenames, y))
random.shuffle(filenames_y)
filenames, y = zip(*filenames_y)

In [52]:
val_set_size = int(len(filenames) * val_ratio)
test_set_size = int(len(filenames) * test_ratio)
filenames_val = filenames[:val_set_size]
filenames_test = filenames[val_set_size:(val_set_size + test_set_size)]
filenames_train = filenames[(val_set_size + test_set_size):]

In [53]:
y_orig_val = y[:val_set_size]
y_orig_test = y[val_set_size:(val_set_size + test_set_size)]
y_orig_train = y[(val_set_size + test_set_size):]

In [54]:
def calc_mfcc(file_path):
    y, sr = librosa.load(file_path)  # read .wav file
       
    hop_length = math.floor(sr * 0.010)  # 10ms hop
    win_length = math.floor(sr * 0.025)  # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y=y, sr=sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # subtract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1, 1))
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0)  # O^r
    # return T x 36 (transpose of X)
    return X.T  # hmmlearn use T x N matrix

In [33]:
def extract_features(in_files, in_y):
    prob_cnt = 0
    out_x = []
    out_y = []
        
    for index, filename in enumerate(in_files):
    
        # Create path from given filename and target item
        path = join(dataset_path, target_list[int(in_y[index])], 
                    filename)
        
        # Check to make sure we're reading a .wav file
        if not path.endswith('.wav'):
            continue

        # Create MFCCs
        mfccs = calc_mfcc(path)

        # Only keep MFCCs with given length
#         if mfccs.shape[1] == len_mfcc:
        out_x.append(mfccs)
        out_y.append(in_y[index])
#         else:
#             print('Dropped:', index, mfccs.shape)
#             prob_cnt += 1
            
    return out_x, out_y, prob_cnt

In [34]:
x_train, y_train, prob = extract_features(filenames_train, 
                                          y_orig_train)
print('Removed percentage:', prob / len(y_orig_train))
x_val, y_val, prob = extract_features(filenames_val, y_orig_val)
print('Removed percentage:', prob / len(y_orig_val))
x_test, y_test, prob = extract_features(filenames_test, y_orig_test)
print('Removed percentage:', prob / len(y_orig_test))

Removed percentage: 0.0
Removed percentage: 0.0
Removed percentage: 0.0


In [35]:
np.savez(feature_sets_file, 
         x_train=x_train, 
         y_train=y_train, 
         x_val=x_val, 
         y_val=y_val, 
         x_test=x_test, 
         y_test=y_test)

  val = np.asanyarray(val)


In [36]:
feature_sets = np.load(feature_sets_file)
feature_sets.files

['x_train', 'y_train', 'x_val', 'y_val', 'x_test', 'y_test']

In [37]:
from os import listdir
from os.path import isdir, join
from tensorflow.keras import layers, models
import numpy as np

In [38]:
dataset_path = 'datasets_main_num_cut'
all_targets = all_targets = [name for name in listdir(dataset_path) if isdir(join(dataset_path, name))]
#all_targets.remove('_background_noise_')
print(all_targets)

['sheila', 'mot', 'ba', 'tat', 'hai', 'bat', 'den', 'quat']


In [39]:
feature_sets_path = ""
feature_sets_filename = 'mfcc_sets_1.npz'
model_filename = 'a1_model.h5'
wake_word = 'stop'

In [40]:

feature_sets = np.load(feature_sets_filename, allow_pickle=True)
print(feature_sets.files)

['x_train', 'y_train', 'x_val', 'y_val', 'x_test', 'y_test']


In [41]:
x_train = feature_sets['x_train']
y_train = feature_sets['y_train']
x_val = feature_sets['x_val']
y_val = feature_sets['y_val']
x_test = feature_sets['x_test']
y_test = feature_sets['y_test']

In [42]:
# Look at tensor dimensions
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)
x_train

(579,)
(72,)
(72,)


array([array([[-1.5810083e+02, -5.9505795e+01,  1.8106384e+01, ...,
               -8.5531296e-03,  1.1047821e-02,  2.8444726e-02],
              [-1.4841779e+02, -6.9928955e+01,  2.1160091e+01, ...,
               -8.5531296e-03,  1.1047821e-02,  2.8444726e-02],
              [-1.4642346e+02, -7.1479774e+01,  2.0416374e+01, ...,
               -8.5531296e-03,  1.1047821e-02,  2.8444726e-02],
              ...,
              [ 1.5090942e+02,  6.5342148e+01, -2.7874496e+01, ...,
               -3.3454481e-01,  1.7277725e+00,  7.8830487e-01],
              [ 1.5267349e+02,  6.8441628e+01, -2.0180248e+01, ...,
               -3.3454481e-01,  1.7277725e+00,  7.8830487e-01],
              [ 1.3885178e+02,  6.4952469e+01, -2.1246819e+01, ...,
               -3.3454481e-01,  1.7277725e+00,  7.8830487e-01]], dtype=float32),
       array([[-1.45896057e+02, -7.86878357e+01,  4.98819351e+00, ...,
               -5.93978413e-16, -5.68351749e-16, -2.07979411e-17],
              [-1.45896057e+02, -7

In [43]:
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)

(579,)
(72,)
(72,)


In [22]:
x_train = x_train.reshape(x_train.shape[0], 
                          x_train.shape[1], 
                          x_train.shape[2], 
                          1)
x_val = x_val.reshape(x_val.shape[0], 
                      x_val.shape[1], 
                      x_val.shape[2], 
                      1)
x_test = x_test.reshape(x_test.shape[0], 
                        x_test.shape[1], 
                        x_test.shape[2], 
                        1)
print(x_train.shape)
print(x_val.shape)
print(x_test.shape)

IndexError: tuple index out of range

In [None]:
sample_shape = x_test.shape[1:]
print(sample_shape)
model = models.Sequential()
model.add(layers.Conv2D(32, 
                        (2, 2), 
                        activation='relu',
                        input_shape=sample_shape))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(32, (2, 2), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(64, (2, 2), activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2, 2)))

# Classifier
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(8, activation='softmax'))
model.summary()


In [None]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer="adam",
              metrics=['acc'])


In [None]:
history = model.fit(x_train, 
                    y_train, 
                    epochs=30, 
                    batch_size=32, 
                    validation_data=(x_val, y_val))

In [None]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()



In [None]:
models.save_model(model, model_filename)


In [None]:
for idx, y in enumerate(y_test):
    if y == 1:
        print(idx)

In [None]:
model = models.load_model(model_filename)
for i in range(100, 110):
    print('Answer:', y_test[i], ' Prediction:', model.predict(np.expand_dims(x_test[i], 0)))

In [None]:
model.evaluate(x=x_test, y=y_test)