In [1]:
import numpy as np
import random
import time
import pyaudio
import struct

In [2]:
CHUNK = 1024
FORMAT = pyaudio.paFloat32
CHANNELS = 1
RATE = 44100
RECORD_SECONDS = 1.01

p = pyaudio.PyAudio()

inputs = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)
outputs = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                output=True,
                frames_per_buffer=CHUNK)

In [3]:
def play_sound(array):
    for i in range(len(array)):
        outputs.write(array[i])

In [4]:
test = np.load("./Act_train/activation/5.npy").flatten()
play_sound(test)

In [5]:
test = np.load("./Act_train/negative/30.npy").flatten()
play_sound(test)

In [6]:
act = []
for i in range(15):
    act.append(np.frombuffer(np.load("./Act_train/activation/"+str(i+1)+".npy").flatten()))

In [7]:
neg = []
for i in range(120):
    neg.append(np.frombuffer(np.load("./Act_train/negative/"+str(i+1)+".npy").flatten()))

In [8]:
act, neg = np.array(act), np.array(neg)
print(act.shape)
print(neg.shape)
print(act[0].shape)
print(neg[0].shape)

(15, 22016)
(120, 22016)
(22016,)
(22016,)


In [9]:
Tx = 1376 # The number of time steps input to the model from the spectrogram
n_freq = 16

In [10]:
def create_training_example(activates, negatives):
    """
    Creates a training example with a given background, activates, and negatives.
    
    Arguments:
    background -- a 10 second background audio recording
    activates -- a list of audio segments of the word "activate"
    negatives -- a list of audio segments of random words that are not "activate"
    Ty -- The number of time steps in the output

    Returns:
    x -- the spectrogram of the training example
    y -- the label at each time step of the spectrogram
    """

    y = 0
    x = None
    flag = np.random.randint(1, 121)
    if flag%5==0:
        x = activates[np.random.choice(len(activates))]
        y = 1
    else:
        x = negatives[np.random.choice(len(negatives))]
    
    return x, y

In [None]:
#print(len(act[0]))
#graph_spectrogram(act[0])

In [None]:
x, y = create_training_example(act, neg)
print(type(x))
print(type(x[0]))
print(type(y))
print(y)
print(x)

In [None]:
nsamples = 3000

In [None]:
nsamples = 3000
X = []
Y = []
for i in range(0, nsamples):
    if i%100 == 0:
        print(i)
    x, y = create_training_example(act, neg)
    X.append(x)
    Y.append(y)
X = np.array(X)
Y = np.array(Y).reshape((nsamples, 1))

In [12]:
print(X.shape)
print(Y.shape)
print("True" if 1 in Y else "False")

(3000, 22016)
(3000, 1)
True


In [10]:
# Save the data for further uses
#np.save(f'./Act_train/XY_train/X.npy', X)
#np.save(f'./Act_train/XY_train/Y.npy', Y)
# Load the preprocessed training examples
X = np.load("./Act_train/XY_train/X.npy")
Y = np.load("./Act_train/XY_train/Y.npy")

In [None]:
X_dev = []
Y_dev = []
for i in range(0, nsamples):
    if i%100 == 0:
        print(i)
    x, y = create_training_example(act, neg)
    X_dev.append(x)
    Y_dev.append(y)
X_dev = np.array(X_dev)
Y_dev = np.array(Y_dev).reshape((nsamples, 1))
np.save(f'./Act_train/XY_dev/X_dev.npy', X_dev)
np.save(f'./Act_train/XY_dev/Y_dev.npy', Y_dev)

In [None]:
X_dev = []
Y_dev = []

In [26]:
X_dev = np.load("./Act_train/XY_dev/X_dev.npy")
Y_dev = np.load("./Act_train/XY_dev/Y_dev.npy")

In [38]:
print((Y_dev[0]))
print(type(Y_dev))
print(Y_dev.shape)
print(X_dev.shape)


[1]
<class 'numpy.ndarray'>
(3000, 1)
(3000, 22016)


In [13]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from tensorflow.keras.layers import GRU, Bidirectional, BatchNormalization, Reshape, Flatten
from tensorflow.keras.optimizers import Adam

In [None]:
model = load_model("modelo")

In [19]:
def modelf(input_shape):
    """
    Function creating the model's graph in Keras.
    
    Argument:
    input_shape -- shape of the model's input data (using Keras conventions)

    Returns:
    model -- Keras model instance
    """
    
    X_input = Input(shape = input_shape)
    
    X = Reshape((Tx,n_freq))(X_input)
    X = Conv1D(filters = 196, kernel_size=15, strides=4)(X)
    X = BatchNormalization()(X)
    X = Activation("relu")(X)
    X = Dropout(rate=0.8)(X)                                  

    X = GRU(units = 128, return_sequences=True)(X)
    X = Dropout(rate = 0.8)(X)
    X = BatchNormalization()(X)                           
    
    X = GRU(units = 128, return_sequences=True)(X)
    X = Dropout(rate = 0.8)(X)       
    X = BatchNormalization()(X) 

    X = GRU(units = 60, return_sequences=True)(X)
    X = Dropout(rate = 0.8)(X)       
    X = BatchNormalization()(X)
    
    X = GRU(units = 60, return_sequences=True)(X)
    X = Dropout(rate = 0.8)(X)       
    X = BatchNormalization()(X) 
    
    X = TimeDistributed(Dense(1, activation = "sigmoid"))(X) 
    X = Dense(1, activation = "sigmoid")(Flatten()(X))

    model = Model(inputs = X_input, outputs = X)
    
    return model

In [20]:
model = modelf(input_shape = (X.shape[1]))

In [21]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 22016)]           0         
                                                                 
 reshape_1 (Reshape)         (None, 1376, 16)          0         
                                                                 
 conv1d_1 (Conv1D)           (None, 341, 196)          47236     
                                                                 
 batch_normalization_4 (Batc  (None, 341, 196)         784       
 hNormalization)                                                 
                                                                 
 activation_1 (Activation)   (None, 341, 196)          0         
                                                                 
 dropout_4 (Dropout)         (None, 341, 196)          0         
                                                           

In [22]:
opt = Adam(learning_rate=1e-6, beta_1=0.9, beta_2=0.999)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["accuracy"])

In [23]:
model.fit(X, Y, batch_size = 16, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x16682aed990>

In [27]:
loss, acc, = model.evaluate(X_dev, Y_dev)
print("Dev set accuracy = ", acc)

Dev set accuracy =  0.8056666851043701


In [25]:
X, Y= [], []

In [None]:
model.save("modelo")

In [32]:
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = inputs.read(CHUNK)
        frames.append(data)
aux = np.frombuffer(np.array(frames))

In [33]:
predictions = model.predict(aux.reshape((1,aux.shape[0])))



In [34]:
1-predictions

array([[0.7733404]], dtype=float32)