In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import numpy as np
from pydub import AudioSegment
import random
import sys
import io
import os
import glob
import IPython
from td_utils import *
%matplotlib inline

---

In [3]:
Tx = 5511 # The number of time steps input to the model from the spectrogram
Ty = 1375 # The number of time steps in the output of our model
n_freq = 101 # Number of frequencies input to the model at each time step of the spectrogram

## Dataset Sequence Generator


In [6]:
from keras.utils import Sequence

In [7]:
# Here, `x_set` is list of path to the spectrogram .npy file
# and `y_set` are the associated truth vector .npy file

class SpectrogramDataGenerator(Sequence):

    def __init__(self, x_set, y_set, batch_size):
        self.x, self.y = x_set, y_set
        self.batch_size = batch_size

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        ''' this method should return a complete batch. '''
        batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]

        return (np.array([np.load(filename) for filename in batch_x]),
                np.array([np.load(filename) for filename in batch_y]))

    def on_epoch_end(self):
        ''' If you want to modify your dataset between epochs you may implement. '''
        pass

In [8]:
import glob
from sklearn.model_selection import train_test_split

In [9]:
x_filenames = sorted(glob.glob('../data/dev_set/x_spectrogram_*.npy'))
y_filenames = sorted(glob.glob('../data/dev_set/y_*.npy'))

In [10]:
print('number of samples =', len(x_filenames))
print(x_filenames[0])
print(y_filenames[0])

number of samples = 10000
../data/dev_set/x_spectrogram_0.npy
../data/dev_set/y_0.npy


## split train/val set

In [11]:
X_filename_train, X_filename_val, Y_filename_train, Y_filename_val = train_test_split(
    x_filenames, y_filenames, test_size=0.20, random_state=42)

In [12]:
print('number of samples =', len(X_filename_train))
print(X_filename_train[0])
print(Y_filename_train[0])

print(X_filename_train[100])
print(Y_filename_train[100])

number of samples = 8000
../data/dev_set/x_spectrogram_9327.npy
../data/dev_set/y_9327.npy
../data/dev_set/x_spectrogram_7297.npy
../data/dev_set/y_7297.npy


---

### Training Generator

In [22]:
batch_size = 250

In [19]:
training_generator = SpectrogramDataGenerator(X_filename_train, Y_filename_train, batch_size)

In [14]:
batch0_X, batch0_y = training_generator.__getitem__(0)

print('batch 0 - X shape=', batch0_X.shape)
print('batch 0 - y shape=', batch0_y.shape)

batch 0 - X shape= (250, 5511, 101)
batch 0 - y shape= (250, 1375, 1)


### Validation Generator

In [20]:
val_generator = SpectrogramDataGenerator(X_filename_val, Y_filename_val, batch_size)

In [21]:
batch0_X, batch0_y = val_generator.__getitem__(0)

print('batch 0 - X shape=', batch0_X.shape)
print('batch 0 - y shape=', batch0_y.shape)

batch 0 - X shape= (250, 5511, 101)
batch 0 - y shape= (250, 1375, 1)


---

# Build Model


The model will use 1-D convolutional layers, GRU layers, and dense layers.

In [23]:
from keras.callbacks import ModelCheckpoint
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from keras.layers import GRU, Bidirectional, BatchNormalization, Reshape
from keras.optimizers import Adam

In [24]:
def model_gru(input_shape):
    """
    Function creating the model's graph in Keras.
    
    Argument:
    input_shape -- shape of the model's input data (using Keras conventions)

    Returns:
    model -- Keras model instance
    """
    
    X_input = Input(shape = input_shape)
    
    # CONV layer
    X = Conv1D(196, 15, strides=4)(X_input)  # CONV1D
    X = BatchNormalization()(X)              # Batch normalization
    X = Activation('relu')(X)                # ReLu activation
    #X = Dropout(0.8)(X)                      # dropout 

    # First GRU Layer
    X = GRU(128, return_sequences=True)(X)   # GRU (use 128 units and return the sequences)
    X = Dropout(0.5)(X)                      # dropout
    X = BatchNormalization()(X)              # Batch normalization
    
    # Second GRU Layer
    X = GRU(128, return_sequences=True)(X)   # GRU (use 128 units and return the sequences)
    X = Dropout(0.5)(X)                      # dropout 
    X = BatchNormalization()(X)              # Batch normalization
    X = Dropout(0.5)(X)                      # dropout 
    
    # Time-distributed dense layer
    X = TimeDistributed(Dense(1, activation = "sigmoid"))(X) # time distributed  (sigmoid)

    model = Model(inputs = X_input, outputs = X)
    
    return model  

In [25]:
model = model_gru(input_shape = (Tx, n_freq))

## Fit the model

In [26]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.01)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=["accuracy"])

---

In [27]:
model.fit_generator(
    epochs=2,

    generator=training_generator,
    steps_per_epoch=len(training_generator),
 
    validation_data=val_generator,
    validation_steps=len(val_generator))

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f6c3e0a6278>