In [1]:
import numpy as np
import os
from os.path import isfile
from PIL import Image as Img
from data_util import *

import keras
from keras.models import Sequential, Model
from keras.utils import plot_model
from keras.layers import Input, Dense, TimeDistributed, LSTM, Dropout, Activation
from keras.layers import Conv1D, MaxPooling1D, Flatten, Conv2D, BatchNormalization, Lambda
from keras.layers.advanced_activations import ELU
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras import backend
from keras.utils import np_utils
from keras.optimizers import Adam, RMSprop
from keras import regularizers

import librosa
import librosa.display
import matplotlib.pyplot as plt

import tensorflow as tf

Using TensorFlow backend.


In [5]:
dir = "../data/debug/"
ds = load_dataset(dir)
x_train, y_train = ds.get_train_full()
x_val, y_val = ds.get_val_full()

batch_size = 32
num_classes = 12

N_LAYERS = 3
FILTER_LENGTH = 5
CONV_FILTER_COUNT = 56
BATCH_SIZE = 32
LSTM_COUNT = 96
EPOCH_COUNT = 70
NUM_HIDDEN = 64
L2_regularization = 0.001


def conv_recurrent_model_build(model_input):
    print('Building model...')
    layer = model_input

    ### 3 1D Convolution Layers
    for i in range(N_LAYERS):
        # give name to the layers
        layer = Conv1D(
            filters=CONV_FILTER_COUNT,
            kernel_size=FILTER_LENGTH,
            kernel_regularizer=regularizers.l2(
                L2_regularization),  # Tried 0.001
            name='convolution_' + str(i + 1))(layer)
        layer = BatchNormalization(momentum=0.9)(layer)
        layer = Activation('relu')(layer)
        layer = MaxPooling1D(2)(layer)
        layer = Dropout(0.4)(layer)

    ## LSTM Layer
    layer = LSTM(LSTM_COUNT, return_sequences=False)(layer)
    layer = Dropout(0.4)(layer)

    ## Dense Layer
    layer = Dense(NUM_HIDDEN,
                  kernel_regularizer=regularizers.l2(L2_regularization),
                  name='dense1')(layer)
    layer = Dropout(0.4)(layer)

    ## Softmax Output
    layer = Dense(num_classes)(layer)
    layer = Activation('softmax', name='output_realtime')(layer)
    model_output = layer
    model = Model(model_input, model_output)

    opt = Adam(lr=0.001)
    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])

    #print(model.summary())
    return model


def train_model(x_train, y_train, x_val, y_val, checkpoint_name):

    print(x_train.shape)
    n_features = x_train[0].shape[0]
    input_shape = (n_features, 256)
    model_input = Input(input_shape, name='input')

    model = conv_recurrent_model_build(model_input)

    tb_callback = TensorBoard(log_dir='./logs/4',
                              histogram_freq=1,
                              batch_size=32,
                              write_graph=True,
                              write_grads=False,
                              write_images=False,
                              embeddings_freq=0,
                              embeddings_layer_names=None,
                              embeddings_metadata=None)

    checkpoint_callback = ModelCheckpoint('../models/'+checkpoint_name+'{epoch:02d}-{val_loss:.2f}.hdf5',
                                          monitor='val_acc',
                                          verbose=1,
                                          save_best_only=True,
                                          mode='max')

    reducelr_callback = ReduceLROnPlateau(monitor='val_acc',
                                          factor=0.5,
                                          patience=10,
                                          min_delta=0.01,
                                          verbose=1)

    callback_list = [checkpoint_callback, reducelr_callback]
    
#     if(os.path.isfile('../models/'+checkpoint_name+'.hdf5')):
#         print("Weights already exists. Change Name!")
#         return

    # Fit the model and get training history.
    print('Training...')
    history = model.fit(x_train,
                        y_train,
                        batch_size=BATCH_SIZE,
                        epochs=EPOCH_COUNT,
                        validation_data=(x_val, y_val),
                        callbacks=callback_list,
                        verbose=1)

    return model, history

# Better to change checkpoint name before run
model, history = train_model(np.array(x_train), np.array(y_train),
                             np.array(x_val), np.array(y_val),
                            "trail_Run")
print("DONE!!!")

loaded dataset from ../data/debug/
(766, 128, 256)
Building model...
Training...
Train on 766 samples, validate on 226 samples
Epoch 1/70

Epoch 00001: val_acc improved from -inf to 0.18142, saving model to ../models/trail_Run01-2.59.hdf5
Epoch 2/70

Epoch 00002: val_acc did not improve from 0.18142
Epoch 3/70

Epoch 00003: val_acc did not improve from 0.18142
Epoch 4/70

Epoch 00004: val_acc did not improve from 0.18142
Epoch 5/70

Epoch 00005: val_acc improved from 0.18142 to 0.18584, saving model to ../models/trail_Run05-2.59.hdf5
Epoch 6/70

Epoch 00006: val_acc did not improve from 0.18584
Epoch 7/70

Epoch 00007: val_acc did not improve from 0.18584
Epoch 8/70

Epoch 00008: val_acc did not improve from 0.18584
Epoch 9/70

Epoch 00009: val_acc did not improve from 0.18584
Epoch 10/70

Epoch 00010: val_acc improved from 0.18584 to 0.19912, saving model to ../models/trail_Run10-2.69.hdf5
Epoch 11/70

Epoch 00011: val_acc improved from 0.19912 to 0.20354, saving model to ../models/tr


Epoch 00043: val_acc did not improve from 0.24779

Epoch 00043: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 44/70

Epoch 00044: val_acc did not improve from 0.24779
Epoch 45/70

Epoch 00045: val_acc did not improve from 0.24779
Epoch 46/70

Epoch 00046: val_acc did not improve from 0.24779
Epoch 47/70

Epoch 00047: val_acc did not improve from 0.24779
Epoch 48/70

Epoch 00048: val_acc did not improve from 0.24779
Epoch 49/70

Epoch 00049: val_acc did not improve from 0.24779
Epoch 50/70

Epoch 00050: val_acc did not improve from 0.24779
Epoch 51/70

Epoch 00051: val_acc did not improve from 0.24779
Epoch 52/70

Epoch 00052: val_acc did not improve from 0.24779
Epoch 53/70

Epoch 00053: val_acc did not improve from 0.24779

Epoch 00053: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 54/70

Epoch 00054: val_acc did not improve from 0.24779
Epoch 55/70

Epoch 00055: val_acc did not improve from 0.24779
Epoch 56/70

Epoch 00056: val_a

In [None]:
#voilà

In [3]:
SHUFFLE_BUFFER = 1000
BATCH_SIZE = 16
NUM_CLASSES = 41

# Create a description of the features.  
feature_description = {
    'feature0': tf.FixedLenFeature([32768], tf.float32),
    'feature1': tf.FixedLenFeature([1], tf.int64)
}

def _parse_function(example_proto):
  # Parse the input tf.Example proto using the dictionary above.
    parsed_example = tf.parse_single_example(example_proto, feature_description)
    parsed_example["feature0"] = tf.transpose(tf.reshape(parsed_example['feature0'], (256,128)))
    return parsed_example

def create_dataset(filepath):
    
    dataset = tf.data.TFRecordDataset(filepath)
    
    dataset = dataset.map(_parse_function) #, num_parallel_calls=8)
    
    # This dataset will go on forever
    dataset = dataset.repeat()
    
    # Set the number of datapoints you want to load and shuffle 
    dataset = dataset.shuffle(SHUFFLE_BUFFER)
    dataset = dataset.batch(BATCH_SIZE)
    
    # Create an iterator
    iterator = dataset.make_one_shot_iterator()
    
    # Create your tf representation of the iterator
    feature = iterator.get_next()
    #print(feature)
    lmfcc = feature["feature0"]
    label = feature["feature1"]
    
    # Bring your picture back in shape
    lmfcc = tf.reshape(lmfcc, [-1,128, 256])
    
    # Create a one hot array for your labels
    label = tf.one_hot(label, NUM_CLASSES)
    print(lmfcc.shape)
    print(label.shape)

    return lmfcc, label

In [4]:
_ = create_dataset("../data/debug/sample.tfrecords")

(?, 128, 256)
(?, 1, 41)
