In [None]:
import numpy as np
from pydub import AudioSegment
import random
import sys
import io
import os
from os import path
import glob
import IPython
import matplotlib.pyplot as plt
import soundfile as sf

%matplotlib inline

In [None]:
IPython.display.Audio("./Act_train/activation/5.wav")

In [None]:
IPython.display.Audio("./Act_train/negative/15.wav")

In [None]:
act = []
for i in range(15):
    act.append("./Act_train/activation/"+str(i+1)+".wav")

In [None]:
neg = []
for i in range(15):
    neg.append("./Act_train/negative/"+str(i+1)+".wav")

In [None]:
print(len(act))
print(len(neg))
print(len(act[0]))
print(len(neg[0]))


In [None]:
Tx = 551 # The number of time steps input to the model from the spectrogram
n_freq = 101 # Number of frequencies input to the model at each time step of the spectrogram
Ty = 1375 # The number of time steps in the output of our model

In [None]:
def get_wav_info(wav_file):
    data, rate = sf.read(wav_file)
    return data, rate

def graph_spectrogram(wav_file):
    data, rate = get_wav_info(wav_file)
    nfft = 200 # Length of each window segment
    fs = Tx # Sampling frequencies
    noverlap = 120 # Overlap between windows
    nchannels = data.ndim
    if nchannels == 1:
        pxx, freqs, bins, im = plt.specgram(data, nfft, fs, noverlap = noverlap)
    elif nchannels == 2:
        pxx, freqs, bins, im = plt.specgram(data[:,0], nfft, fs, noverlap = noverlap)
    return pxx

def create_training_example(activates, negatives):
    """
    Creates a training example with a given background, activates, and negatives.
    
    Arguments:
    background -- a 10 second background audio recording
    activates -- a list of audio segments of the word "activate"
    negatives -- a list of audio segments of random words that are not "activate"
    Ty -- The number of time steps in the output

    Returns:
    x -- the spectrogram of the training example
    y -- the label at each time step of the spectrogram
    """

    y = 0
    x = None
    flag = np.random.randint(1, 7)
    if flag%3==0:
        act = activates[np.random.choice(len(activates))]
        x = graph_spectrogram(act)
        y = 1
    else:
        neg = negatives[np.random.choice(len(negatives))]
        x = graph_spectrogram(neg)    
    
    return x, y

In [None]:
x, y = create_training_example(act, neg)
print(type(x))
print(x.shape)
print(type(y))

In [None]:
np.random.seed(4543)
nsamples = 5
X = []
Y = []
for i in range(0, nsamples):
    if i%100 == 0:
        print(i)
    x, y = create_training_example(act, neg)
    X.append(x.swapaxes(0,1))
    Y.append(y)
X = np.array(X)
Y = np.array(Y)

In [None]:
print((X[0].shape))

In [None]:
print(len(X))
print(type(Y))
print(X.shape)
print(Y.shape)
print(type(X[0]))
print((Y[0]))

In [None]:
# Save the data for further uses
np.save(f'./Act_train/XY_train/X.npy', X)
np.save(f'./Act_train/XY_train/Y.npy', Y)
# Load the preprocessed training examples
#X = np.load("./Act_train/XY_train/X.npy")
#Y = np.load("./Act_train/XY_train/Y.npy")

In [None]:
np.random.seed(4543)
nsamples = 2000
X_dev = []
Y_dev = []
for i in range(0, nsamples):
    if i%100 == 0:
        print(i)
    x, y = create_training_example(act, neg)
    X_dev.append(x.swapaxes(0,1))
    Y_dev.append(y)
X_dev = np.array(X_dev, dtype="numpy.ndarray")
Y_dev = np.array(Y_dev)
np.save(f'./Act_train/XY_dev/X_dev.npy', X_dev)
np.save(f'./Act_train/XY_dev/Y_dev.npy', Y_dev)

In [None]:
X_dev = np.load("./Act_train/XY_train/X_dev.npy")
Y_dev = np.load("./Act_train/XY_train/Y_dev.npy")

In [None]:
print((Y_dev[0]))
print(type(Y_dev))

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from tensorflow.keras.layers import GRU, Bidirectional, BatchNormalization, Reshape
from tensorflow.keras.optimizers import Adam

In [None]:
def modelf(input_shape):
    """
    Function creating the model's graph in Keras.
    
    Argument:
    input_shape -- shape of the model's input data (using Keras conventions)

    Returns:
    model -- Keras model instance
    """
    
    X_input = Input(shape = input_shape)
    
    X = Conv1D(filters = 196, kernel_size=15, strides=4)(X_input)
    X = BatchNormalization()(X)
    X = Activation("relu")(X)
    X = Dropout(rate=0.8)(X)                                  

    X = GRU(units = 128, return_sequences=True)(X)
    X = Dropout(rate = 0.8)(X)
    X = BatchNormalization()(X)                           
    
    X = GRU(units = 128, return_sequences=True)(X)
    X = Dropout(rate = 0.8)(X)       
    X = BatchNormalization()(X) 
    X = Dropout(rate = 0.8)(X)                                 
    
    X = TimeDistributed(Dense(1, activation = "sigmoid"))(X) 

    model = Model(inputs = X_input, outputs = X)
    
    return model

In [None]:
model = modelf(input_shape = (Tx, n_freq))

In [None]:
model.summary()

In [None]:
opt = Adam(learning_rate=1e-6, beta_1=0.9, beta_2=0.999)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["accuracy"])

In [None]:
print(Y.shape)

In [None]:

model.fit(X, Y, batch_size = 16, epochs=100)

In [None]:
print(type(X))

In [None]:
loss, acc, = model.evaluate(X_dev, Y_dev)
print("Dev set accuracy = ", acc)

In [None]:
aux = 

In [None]:
print(type(aux))
print(len(aux))