In [1]:
import numpy as np
import random
import time
import pyaudio
import struct
import sys
import io
import os
from os import path
import glob
import IPython
import matplotlib.pyplot as plt
import soundfile as sf
import wave

In [2]:
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 1.01

p = pyaudio.PyAudio()

inputs = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

In [3]:
IPython.display.Audio("./Act_train/activation/2.wav")

In [4]:
IPython.display.Audio("./Act_train/negative/50.wav")

In [3]:
act, neg = [], []
for i in range(15):
    act.append(np.load("./Act_train/activation/"+str(i+1)+".npy"))
for i in range(120):
    neg.append(np.load("./Act_train/negative/"+str(i+1)+".npy"))

In [4]:
act, neg = np.array(act), np.array(neg)
print(act.shape)
print(neg.shape)

(15, 44032)
(120, 44032)


In [17]:
Tx = 688 # The number of time steps input to the model from the spectrogram
n_freq = 64 # Number of frequencies input to the model at each time step of the spectrogram

In [5]:
def create_training_example(activates, negatives):
    """
    Creates a training example with a given background, activates, and negatives.
    
    Arguments:
    background -- a 10 second background audio recording
    activates -- a list of audio segments of the word "activate"
    negatives -- a list of audio segments of random words that are not "activate"
    Ty -- The number of time steps in the output

    Returns:
    x -- the spectrogram of the training example
    y -- the label at each time step of the spectrogram
    """

    y = 0
    x = None
    flag = np.random.randint(1, 121)
    if flag%5==0:
        x = (activates[np.random.choice(len(activates))])
        y = 1
    else:
        x = (negatives[np.random.choice(len(negatives))])
    
    return x, y

In [7]:
x, y = create_training_example(act, neg)
print(type(x))
print((y))
print(x.shape)

<class 'numpy.ndarray'>
0
(44032,)


In [8]:
nsamples = 2000
ntimes=5

In [9]:
X = []
Y = []
for i in range(0, nsamples):
    if i%100 == 0:
        print(i)
    x, y = create_training_example(act, neg)
    X.append(x)
    Y.append(y)
X = np.array(X)
Y = np.array(Y).reshape((nsamples, 1))
    # Save the data for further uses
np.save(f'./Act_train/XY_train/X_npy.npy', X)
np.save(f'./Act_train/XY_train/Y_npy.npy', Y)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900


In [10]:
print(X.shape)
print(Y.shape)
print("True" if 1 in Y else "False")

(2000, 44032)
(2000, 1)
True


In [11]:
X_dev = []
Y_dev = []
for i in range(0, nsamples):
    if i%100 == 0:
        print(i)
    x, y = create_training_example(act, neg)
    X_dev.append(x)
    Y_dev.append(y)
X_dev = np.array(X_dev)
Y_dev = np.array(Y_dev).reshape((nsamples, 1))
np.save(f'./Act_train/XY_dev/X_dev_npy.npy', X_dev)
np.save(f'./Act_train/XY_dev/Y_dev_npy.npy', Y_dev)

0
100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900


In [None]:
X_dev = np.load("./Act_train/XY_dev/X_dev.npy")
Y_dev = np.load("./Act_train/XY_dev/Y_dev.npy")

In [12]:
print((Y_dev[0]))
print(type(Y_dev))
print(Y_dev.shape)
print(X_dev.shape)


[1]
<class 'numpy.ndarray'>
(2000, 1)
(2000, 44032)


In [13]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from tensorflow.keras.layers import GRU, Bidirectional, BatchNormalization, Reshape, Flatten
from tensorflow.keras.optimizers import Adam

In [109]:
def modelf(input_shape):
    """
    
    Argument:
    input_shape -- shape of the model's input data (using Keras conventions)

    Returns:
    model -- Keras model instance
    """
    
    X_input = Input(shape = input_shape)
    
    X = Reshape((Tx, n_freq))(X_input)
    X = Conv1D(filters = 196, kernel_size=5, strides=2)(X)
    X = BatchNormalization()(X)
    X = Activation("relu")(X)
    X = Dropout(rate=0.85)(X)                                  

    X = GRU(units = 128, return_sequences=True)(X)
    X = Dropout(rate = 0.85)(X)
    X = BatchNormalization()(X)                           
    
    X = GRU(units = 128, return_sequences=True)(X)
    X = Dropout(rate = 0.85)(X)       
    X = BatchNormalization()(X)
    
    X = GRU(units = 128, return_sequences=True)(X)
    X = Dropout(rate = 0.85)(X)       
    X = BatchNormalization()(X) 
    
    X = GRU(units = 60, return_sequences=True)(X)
    X = Dropout(rate = 0.85)(X)       
    X = BatchNormalization()(X) 
    
    X = GRU(units = 60, return_sequences=True)(X)
    X = Dropout(rate = 0.85)(X)       
    X = BatchNormalization()(X) 
    
    X = GRU(units = 30, return_sequences=True)(X)
    X = Dropout(rate = 0.90)(X)       
    X = BatchNormalization()(X) 
    
    X = TimeDistributed(Dense(1, activation = "sigmoid"))(X)
    #X = BatchNormalization()(X)
    X = (Dense(1, activation = "sigmoid"))(Flatten()(X))

    model = Model(inputs = X_input, outputs = X)
    
    return model

In [110]:
model = modelf(input_shape = (X.shape[1]))

In [111]:
model.summary()

Model: "model_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_20 (InputLayer)       [(None, 44032)]           0         
                                                                 
 reshape_17 (Reshape)        (None, 688, 64)           0         
                                                                 
 conv1d_17 (Conv1D)          (None, 342, 196)          62916     
                                                                 
 batch_normalization_114 (Ba  (None, 342, 196)         784       
 tchNormalization)                                               
                                                                 
 activation_17 (Activation)  (None, 342, 196)          0         
                                                                 
 dropout_111 (Dropout)       (None, 342, 196)          0         
                                                          

In [112]:
opt = Adam(learning_rate=1e-16, beta_1=0.9, beta_2=0.999)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["accuracy"])

In [114]:
model.fit(X, Y, batch_size = 5, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x28e2b0eeb00>

In [115]:
loss, acc, = model.evaluate(X_dev, Y_dev)
print("Dev set accuracy = ", acc)

Dev set accuracy =  0.7979999780654907


In [None]:
model = load_model("modelo")

In [65]:
X, Y= [], []

In [None]:
model.save("modelo")