In [2]:
import glob
import numpy as np
import tensorflow as tf

In [70]:
KERNEL_SIZE = 15
STRIDE = 4
FRAME_RATE = 48000
NFFT = 512
TX = FRAME_RATE * 0.0195
FX = int(NFFT / 2) + 1
TY = round((TX - KERNEL_SIZE + STRIDE) / STRIDE)

In [71]:
def _extract_feature(record, feature):
    example = tf.train.Example.FromString(record.numpy())
    return example.features.feature[feature].float_list.value

In [72]:
# Load tf record dataset
def parser(record):
    
    X = tf.reshape(
            tf.py_function(
            lambda r: _extract_feature(r, "X"),
            (record,),
            tf.float32
        ), [Tx, n_freq]
    )
    
    Y = tf.reshape(
        tf.py_function(
            lambda r: _extract_feature(r, "Y"),
            (record,),
            tf.float32
        ), [Ty, num_classes]
    )
    
    return X, Y
    
def dataset_input_fn(filenames, batch_size, num_epochs):
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.map(parser)
    dataset = dataset.shuffle(buffer_size=10000)
    dataset = dataset.batch(batch_size)
    dataset = dataset.repeat(num_epochs)
    #iterator = dataset.make_one_shot_iterator()
    #features, labels = iterator.get_next()

    return dataset

In [73]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model, load_model, Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Input, Masking, TimeDistributed, LSTM, Conv1D
from tensorflow.keras.layers import GRU, Bidirectional, BatchNormalization, Reshape
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [74]:
def seq_model(input_shape, n_classes):
    """
    Function creating the model's graph in Keras.
    
    Argument:
    input_shape -- shape of the model's input data (using Keras conventions)

    Returns:
    model -- Keras model instance
    """
    
    X_input = Input(shape = input_shape)
        
    # Step 1: CONV layer (≈4 lines)
    X = Conv1D(196, kernel_size=KERNEL_SIZE, strides=STRIDE)(X_input)                                 # CONV1D
    X = BatchNormalization()(X)                                 # Batch normalization
    X = Activation('relu')(X)                                 # ReLu activation
    X = Dropout(0.8)(X)                                 # dropout (use 0.8)

    # Step 2: First GRU Layer (≈4 lines)
    X = GRU(units = 128, return_sequences = True)(X) # GRU (use 128 units and return the sequences)
    X = Dropout(0.8)(X)                                 # dropout (use 0.8)
    X = BatchNormalization()(X)                                 # Batch normalization
    
    # Step 3: Second GRU Layer (≈4 lines)
    X = GRU(units = 128, return_sequences = True)(X)   # GRU (use 128 units and return the sequences)
    X = Dropout(0.8)(X)                                 # dropout (use 0.8)
    X = BatchNormalization()(X)                                  # Batch normalization
    X = Dropout(0.8)(X)                                  # dropout (use 0.8)
    
    # Step 4: Time-distributed dense layer (≈1 line)
    X = TimeDistributed(Dense(n_classes, activation = "sigmoid"))(X) # time distributed  (sigmoid)

    model = Model(inputs = X_input, outputs = X)
    
    return model

In [75]:
keras_model = seq_model((TX, FX), 1)

In [76]:
keras_model.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 936, 257)]        0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 231, 196)          755776    
_________________________________________________________________
batch_normalization_6 (Batch (None, 231, 196)          784       
_________________________________________________________________
activation_2 (Activation)    (None, 231, 196)          0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 231, 196)          0         
_________________________________________________________________
gru_4 (GRU)                  (None, 231, 128)          124800    
_________________________________________________________________
dropout_9 (Dropout)          (None, 231, 128)          0   

In [78]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, decay=0.01)
keras_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

In [79]:
training_set = dataset_input_fn(tfrecord_path, 16, None)

history = keras_model.fit(
    training_set.make_one_shot_iterator(),
    steps_per_epoch=10,
    epochs=5,
    verbose = 1
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
