In [78]:
import numpy as np
import os
from os.path import isfile
from PIL import Image as Img
from datetime import datetime

import sys
sys.path.append("../pianition/data_util.py")
from data_util import *

import keras
from keras.models import Sequential, Model
from keras.utils import plot_model
from keras.layers import Input, Dense, TimeDistributed, LSTM, Dropout, Activation
from keras.layers import Conv1D, MaxPooling1D, Flatten, MaxPooling2D, Reshape
from keras.layers import Conv2D, BatchNormalization, Lambda, Permute, GRU
from keras.layers.advanced_activations import ELU
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras import backend
from keras.utils import np_utils
from keras.optimizers import Adam, RMSprop
from keras import regularizers

import librosa
import librosa.display
import matplotlib.pyplot as plt

import tensorflow as tf

In [79]:
SHUFFLE_BUFFER = 1000
BATCH_SIZE = 32
NUM_CLASSES = 12

# Create a description of the features.  
feature_description = {
    'feature0': tf.FixedLenFeature([32768], tf.float32),
    'feature1': tf.FixedLenFeature([1], tf.int64)
}

def _parse_function(example_proto):
  # Parse the input tf.Example proto using the dictionary above.
    parsed_example = tf.parse_single_example(example_proto, feature_description)
    parsed_example["feature0"] = tf.transpose(tf.reshape(parsed_example['feature0'], (256,128)))
    return parsed_example

def create_dataset(filepath):
    
    dataset = tf.data.TFRecordDataset(filepath)
    
    dataset = dataset.map(_parse_function) #, num_parallel_calls=8)
    
    # This dataset will go on forever
    dataset = dataset.repeat()
    
    # Set the number of datapoints you want to load and shuffle 
    dataset = dataset.shuffle(SHUFFLE_BUFFER)
    dataset = dataset.batch(BATCH_SIZE)
    
    # Create an iterator
    iterator = dataset.make_one_shot_iterator()
    
    # Create your tf representation of the iterator
    feature = iterator.get_next()
    #print(feature)
    lmfcc = feature["feature0"]
    label = feature["feature1"]
    
    # Bring your picture back in shape
    lmfcc = tf.reshape(lmfcc, [-1,128, 256, 1])
    
    # Create a one hot array for your labels
    label = tf.one_hot(label, NUM_CLASSES)
    print(lmfcc.shape)
    print(label.shape)

    return lmfcc, label

In [80]:
lmfcc, label = create_dataset("../data/debug/sample.tfrecords")

(?, 128, 256, 1)
(?, 1, 12)


In [81]:
def get_callbacks(checkpoint_name):
    logDir = "./Graph/" + datetime.now().strftime("%Y%m%d-%H%M%S") + "/"
    tb = TensorBoard(log_dir=logDir,
                     histogram_freq=2,
                     write_graph=True,
                     write_images=True,
                     write_grads=True,
                     update_freq='epoch')

#     tb_callback = TensorBoard(
#         log_dir='../models/logs/',
#         histogram_freq=1,
#         batch_size=32,
#         write_graph=True,
#         write_grads=False,
#         write_images=False,
#         embeddings_freq=0,
#         embeddings_layer_names=None,
#         embeddings_metadata=None,
#     )

    checkpoint_callback = ModelCheckpoint('../models/' + checkpoint_name +
                                          '{epoch:02d}-{val_loss:.2f}.hdf5',
                                          monitor='val_acc',
                                          verbose=1,
                                          save_best_only=True,
                                          mode='max')

    reducelr_callback = ReduceLROnPlateau(monitor='val_acc',
                                          factor=0.5,
                                          patience=10,
                                          min_delta=0.01,
                                          verbose=1)

    callback_list = [checkpoint_callback, reducelr_callback]

    return callback_list

In [82]:
# Data iterator
lmfcc, label = create_dataset("../data/debug/train.tfrecords")
lmfcc_val, label_val = create_dataset("../data/debug/val.tfrecords")

nb_classes = 12
nb_layers = 4  # number of convolutional layers
nb_filters = [64, 128, 128, 128]  # filter sizes
kernel_size = (3, 3)  # convolution kernel size
activation = 'elu'  # activation function to use after each layer
pool_size = [(2, 2), (4, 2), (4, 2), (4, 2), (4, 2)]  # size of pooling area

# shape of input data (frequency, time, channels)
#input_shape = (X_shape[1], X_shape[2], X_shape[3])
frequency_axis = 1
time_axis = 2
channel_axis = 3

# Create sequential model and normalize along frequency axis
model_input = keras.layers.Input(tensor=lmfcc)

x = BatchNormalization(axis=frequency_axis)(model_input)

# First convolution layer specifies shape
x = Conv2D(nb_filters[0],
           kernel_size=kernel_size,
           padding='same',
           data_format="channels_last")(x)
x = Activation(activation)(x)
x = BatchNormalization(axis=channel_axis)(x)
x = MaxPooling2D(pool_size=pool_size[0], strides=pool_size[0])(x)
x = Dropout(0.1)(x)

# Add more convolutional layers
for layer in range(nb_layers - 1):
    # Convolutional layer
    x = Conv2D(nb_filters[layer + 1], kernel_size=kernel_size,
               padding='same')(x)
    x = Activation(activation)(x)
    x = BatchNormalization(axis=channel_axis)(x)
    # Improves overfitting/underfitting
    x = MaxPooling2D(pool_size=pool_size[layer + 1],
                     strides=pool_size[layer + 1])(x)  # Max pooling
    x = Dropout(0.1)(x)

    # Reshaping input for recurrent layer
# (frequency, time, channels) --> (time, frequency, channel)
x = Permute((time_axis, frequency_axis, channel_axis))(x)
resize_shape = x._keras_shape[2] * x._keras_shape[3]
x = Reshape((x._keras_shape[1], resize_shape))(x)

# recurrent layer
x = GRU(32, return_sequences=True)(x)
x = GRU(32, return_sequences=False)(x)
x = Dropout(0.3)(x)

# Output layer
x = Dense(nb_classes)(x)
model_output = Activation("softmax")(x)

train_model = Model(inputs=model_input, outputs=model_output)


train_model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=0.0001),
              metrics=['accuracy'])

batch_size = 16
# Better to change checkpoint name before run
train_model.fit(epochs=70,
                steps_per_epoch=100,
                validation_data=(lmfcc_val, label_val),
                validation_steps=100,
                callbacks=get_callbacks(checkpoint_name="trail"))

(?, 128, 256, 1)
(?, 1, 12)
(?, 128, 256, 1)
(?, 1, 12)


ValueError: When feeding symbolic tensors to a model, we expect thetensors to have a static batch size. Got tensor with shape: (None, 1, 12)