In [2]:
import numpy as np
import os
from os.path import isfile
from PIL import Image as Img
from datetime import datetime

import keras
from keras.models import Sequential, Model
from keras.utils import plot_model
from keras.layers import Input, Dense, TimeDistributed, LSTM, Dropout, Activation
from keras.layers import Conv1D, MaxPooling1D, Flatten, MaxPooling2D, Reshape
from keras.layers import Conv2D, BatchNormalization, Lambda, Permute, GRU
from keras.layers.advanced_activations import ELU
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras import backend
from keras.utils import np_utils
from keras.optimizers import Adam, RMSprop
from keras import regularizers

import librosa
import librosa.display
import matplotlib.pyplot as plt

import tensorflow as tf

In [3]:
SHUFFLE_BUFFER = 100
BATCH_SIZE = 32
NUM_CLASSES = 12

# Create a description of the features.  
feature_description = {
    'feature0': tf.FixedLenFeature([32768], tf.float32),
    'feature1': tf.FixedLenFeature([1], tf.int64)
}

def _parse_function(example_proto):
  # Parse the input tf.Example proto using the dictionary above.
    parsed_example = tf.parse_single_example(example_proto, feature_description)
    parsed_example["feature0"] = tf.transpose(tf.reshape(parsed_example['feature0'], (256,128)))
    return parsed_example

def create_dataset(filepath):
    
    dataset = tf.data.TFRecordDataset(filepath)
    
    dataset = dataset.map(_parse_function) #, num_parallel_calls=8)
    
    # This dataset will go on forever
    dataset = dataset.repeat()
    
    # Set the number of datapoints you want to load and shuffle 
    dataset = dataset.shuffle(SHUFFLE_BUFFER)
    dataset = dataset.batch(BATCH_SIZE)
    
    # Create an iterator
    iterator = dataset.make_one_shot_iterator()
    
    # Create your tf representation of the iterator
    feature = iterator.get_next()
    #print(feature)
    lmfcc = feature["feature0"]
    label = feature["feature1"]
    
    # Bring your picture back in shape
    lmfcc = tf.reshape(lmfcc, [-1,128, 256])
    
    # Create a one hot array for your labels
    label = tf.one_hot(label, NUM_CLASSES)
    print("inp",lmfcc.shape)
    print("inp",label.shape)

    return lmfcc, label

In [4]:
lmfcc, label = create_dataset("../data/debug/sample.tfrecords")

inp (?, 128, 256)
inp (?, 1, 12)


In [5]:
def get_callbacks(checkpoint_name):
    logDir = "./Graph/" + datetime.now().strftime("%Y%m%d-%H%M%S") + "/"
    tb = TensorBoard(log_dir=logDir,
                     histogram_freq=2,
                     write_graph=True,
                     write_images=True,
                     write_grads=True,
                     update_freq='epoch')

#     tb_callback = TensorBoard(
#         log_dir='../models/logs/',
#         histogram_freq=1,
#         batch_size=32,
#         write_graph=True,
#         write_grads=False,
#         write_images=False,
#         embeddings_freq=0,
#         embeddings_layer_names=None,
#         embeddings_metadata=None,
#     )

    checkpoint_callback = ModelCheckpoint('../models/' + checkpoint_name +
                                          '{epoch:02d}-{val_loss:.2f}.hdf5',
                                          monitor='val_acc',
                                          verbose=1,
                                          save_best_only=True,
                                          mode='max')

    reducelr_callback = ReduceLROnPlateau(monitor='val_acc',
                                          factor=0.5,
                                          patience=10,
                                          min_delta=0.01,
                                          verbose=1)

    callback_list = [checkpoint_callback, reducelr_callback]

    return callback_list

In [10]:
# Data iterator
lmfcc, label = create_dataset("../data/debug/train.tfrecords")
lmfcc_val, label_val = create_dataset("../data/debug/val.tfrecords")

#Build network
NUM_CLASSES = 12  # Must Change in the tf reader as well
N_LAYERS = 3
CONV_FILTER_COUNT = 64
FILTER_LENGTH = 5

POOL_SIZE = 2

GRU_COUNT = 64
NUM_HIDDEN = 128
L2_regularization = 0.001

# Input
model_input = keras.layers.Input(tensor=lmfcc)
print(model_input.shape)
layer = Permute((2, 1), input_shape=(128, 256))(model_input)
print(layer.shape)
# resize_shape = model.output_shape[2] * model.output_shape[3]
# model.add(Reshape((model.output_shape[1], resize_shape)))

# model.add(Permute((2, 1), input_shape=(10, 64)))
# now: model.output_shape == (None, 64, 10)

# Conv1D , input_shape=(10, 128) for time series sequences of 10 time steps with 128 features per step
# 1st conv
layer = Conv1D(filters=CONV_FILTER_COUNT,
               kernel_size=FILTER_LENGTH)(layer)  #(model_input)
layer = Activation('relu')(layer)
layer = MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_SIZE)(layer)
layer = Dropout(0.2)(layer)

for i in range(N_LAYERS - 1):
    layer = Conv1D(filters=128, kernel_size=FILTER_LENGTH)(layer)
    layer = Activation('relu')(layer)
    layer = MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_SIZE)(layer)
    layer = Dropout(0.4)(layer)

## LSTM Layer
layer = GRU(GRU_COUNT, return_sequences=True)(layer)
layer = GRU(GRU_COUNT, return_sequences=False)(layer)

layer = Dropout(0.4)(layer)

## Softmax Output
layer = Dense(NUM_CLASSES)(layer)
layer = Activation('softmax')(layer)
model_output = layer

#model_output = Dense(NUM_CLASSES, activation='relu')(model_output)

#Create your model
train_model = Model(inputs=model_input, outputs=model_output)

#compile
train_model.compile(loss='categorical_crossentropy',
                    optimizer=Adam(lr=0.01),
                    metrics=['accuracy'],
                    target_tensors=[label])

#Train the model
#steps per epoch could be viewed as dataset/batchsize
batch_size = 16
# Better to change checkpoint name before run
train_model.fit(epochs=70,
                steps_per_epoch=10,
                validation_data=(lmfcc_val, label_val),
                validation_steps=10,
                callbacks=get_callbacks(checkpoint_name="trail"))

inp (?, 128, 256)
inp (?, 1, 12)
inp (?, 128, 256)
inp (?, 1, 12)
(?, 128, 256)
(?, 256, 128)
Epoch 1/70

Epoch 00001: val_acc improved from -inf to 0.04688, saving model to ../models/trail01-4.57.hdf5
Epoch 2/70

Epoch 00002: val_acc did not improve from 0.04688
Epoch 3/70

Epoch 00003: val_acc did not improve from 0.04688
Epoch 4/70

Epoch 00004: val_acc improved from 0.04688 to 0.05000, saving model to ../models/trail04-3.16.hdf5
Epoch 5/70

Epoch 00005: val_acc did not improve from 0.05000
Epoch 6/70

Epoch 00006: val_acc did not improve from 0.05000
Epoch 7/70

Epoch 00007: val_acc improved from 0.05000 to 0.05937, saving model to ../models/trail07-2.83.hdf5
Epoch 8/70

Epoch 00008: val_acc did not improve from 0.05937
Epoch 9/70

Epoch 00009: val_acc did not improve from 0.05937
Epoch 10/70

Epoch 00010: val_acc did not improve from 0.05937
Epoch 11/70

Epoch 00011: val_acc did not improve from 0.05937
Epoch 12/70

Epoch 00012: val_acc did not improve from 0.05937
Epoch 13/70

Ep


Epoch 00042: val_acc improved from 0.32500 to 0.34063, saving model to ../models/trail42-2.25.hdf5
Epoch 43/70

Epoch 00043: val_acc did not improve from 0.34063
Epoch 44/70

Epoch 00044: val_acc did not improve from 0.34063
Epoch 45/70

Epoch 00045: val_acc did not improve from 0.34063
Epoch 46/70

Epoch 00046: val_acc did not improve from 0.34063
Epoch 47/70

Epoch 00047: val_acc did not improve from 0.34063
Epoch 48/70

Epoch 00048: val_acc did not improve from 0.34063
Epoch 49/70

Epoch 00049: val_acc did not improve from 0.34063
Epoch 50/70

Epoch 00050: val_acc improved from 0.34063 to 0.36250, saving model to ../models/trail50-1.92.hdf5
Epoch 51/70

Epoch 00051: val_acc did not improve from 0.36250
Epoch 52/70

Epoch 00052: val_acc did not improve from 0.36250
Epoch 53/70

Epoch 00053: val_acc did not improve from 0.36250
Epoch 54/70

Epoch 00054: val_acc did not improve from 0.36250
Epoch 55/70

Epoch 00055: val_acc did not improve from 0.36250
Epoch 56/70

Epoch 00056: val_ac

KeyboardInterrupt: 

In [11]:
train_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 128, 256)          0         
_________________________________________________________________
permute_4 (Permute)          (None, 256, 128)          0         
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 252, 64)           41024     
_________________________________________________________________
activation_10 (Activation)   (None, 252, 64)           0         
_________________________________________________________________
max_pooling1d_7 (MaxPooling1 (None, 126, 64)           0         
_________________________________________________________________
dropout_9 (Dropout)          (None, 126, 64)           0         
_________________________________________________________________
conv1d_8 (Conv1D)            (None, 122, 128)          41088     
__________

In [8]:
#RNN
# Data iterator
lmfcc, label = create_dataset("../data/debug/train.tfrecords")
lmfcc_val, label_val = create_dataset("../data/debug/val.tfrecords")

#Build network
NUM_CLASSES = 12  # Must Change in the tf reader as well
N_LAYERS = 3
CONV_FILTER_COUNT = 64
FILTER_LENGTH = 5

POOL_SIZE = 2

GRU_COUNT = 64
NUM_HIDDEN = 128
L2_regularization = 0.001

# Input
model_input = keras.layers.Input(tensor=lmfcc)
print(model_input.shape)
layer = Permute((2, 1), input_shape=(128, 256))(model_input)
print(layer.shape)
# resize_shape = model.output_shape[2] * model.output_shape[3]
# model.add(Reshape((model.output_shape[1], resize_shape)))

# model.add(Permute((2, 1), input_shape=(10, 64)))
# now: model.output_shape == (None, 64, 10)



## LSTM Layer
layer = LSTM(GRU_COUNT, return_sequences=True)(layer)
layer = LSTM(GRU_COUNT, return_sequences=False)(layer)

## Softmax Output
layer = Dense(NUM_CLASSES)(layer)
layer = Activation('softmax')(layer)
model_output = layer

#model_output = Dense(NUM_CLASSES, activation='relu')(model_output)

#Create your model
train_model = Model(inputs=model_input, outputs=model_output)

#compile
train_model.compile(loss='categorical_crossentropy',
                    optimizer=Adam(lr=0.01),
                    metrics=['accuracy'],
                    target_tensors=[label])

#Train the model
#steps per epoch could be viewed as dataset/batchsize
batch_size = 16
# Better to change checkpoint name before run
train_model.fit(epochs=70,
                steps_per_epoch=10,
                validation_data=(lmfcc_val, label_val),
                validation_steps=10,
                callbacks=get_callbacks(checkpoint_name="trail"))

inp (?, 128, 256)
inp (?, 1, 12)
inp (?, 128, 256)
inp (?, 1, 12)
(?, 128, 256)
(?, 256, 128)
Epoch 1/70

Epoch 00001: val_acc improved from -inf to 0.05312, saving model to ../models/trail01-3.73.hdf5
Epoch 2/70

Epoch 00002: val_acc did not improve from 0.05312
Epoch 3/70

Epoch 00003: val_acc did not improve from 0.05312
Epoch 4/70

Epoch 00004: val_acc improved from 0.05312 to 0.05937, saving model to ../models/trail04-3.07.hdf5
Epoch 5/70

Epoch 00005: val_acc did not improve from 0.05937
Epoch 6/70

Epoch 00006: val_acc improved from 0.05937 to 0.17500, saving model to ../models/trail06-2.22.hdf5
Epoch 7/70

Epoch 00007: val_acc did not improve from 0.17500
Epoch 8/70

Epoch 00008: val_acc improved from 0.17500 to 0.19375, saving model to ../models/trail08-2.21.hdf5
Epoch 9/70

Epoch 00009: val_acc improved from 0.19375 to 0.30938, saving model to ../models/trail09-2.33.hdf5
Epoch 10/70

Epoch 00010: val_acc did not improve from 0.30938
Epoch 11/70

Epoch 00011: val_acc did not i


Epoch 00042: val_acc did not improve from 0.39062

Epoch 00042: ReduceLROnPlateau reducing learning rate to 0.0012499999720603228.
Epoch 43/70

Epoch 00043: val_acc did not improve from 0.39062
Epoch 44/70

Epoch 00044: val_acc did not improve from 0.39062
Epoch 45/70

Epoch 00045: val_acc did not improve from 0.39062
Epoch 46/70

Epoch 00046: val_acc did not improve from 0.39062
Epoch 47/70

Epoch 00047: val_acc did not improve from 0.39062
Epoch 48/70

Epoch 00048: val_acc did not improve from 0.39062
Epoch 49/70

Epoch 00049: val_acc did not improve from 0.39062
Epoch 50/70

Epoch 00050: val_acc did not improve from 0.39062
Epoch 51/70

Epoch 00051: val_acc did not improve from 0.39062
Epoch 52/70

Epoch 00052: val_acc did not improve from 0.39062

Epoch 00052: ReduceLROnPlateau reducing learning rate to 0.0006249999860301614.
Epoch 53/70

Epoch 00053: val_acc did not improve from 0.39062
Epoch 54/70

Epoch 00054: val_acc did not improve from 0.39062
Epoch 55/70

Epoch 00055: val_a

<keras.callbacks.History at 0x7ff04051a550>