In [17]:
import numpy as np
import os
from os.path import isfile
from datetime import datetime

import keras
from keras.models import Sequential, Model
from keras.utils import plot_model
from keras.layers import Input, Dense, TimeDistributed, LSTM, Dropout, Activation
from keras.layers import Conv1D, MaxPooling1D, Flatten, MaxPooling2D, Reshape
from keras.layers import Conv2D, BatchNormalization, Lambda, Permute, GRU
from keras.layers.advanced_activations import ELU
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras import backend
from keras.utils import np_utils
from keras.optimizers import Adam, RMSprop
from keras import regularizers

import librosa
import librosa.display
import matplotlib.pyplot as plt

import tensorflow as tf

import sys
sys.path.insert(0, '..')
#sys.path.insert(0, '/home/nik/kth/y1p1/speech/project/')
from pianition.data_util import load_dataset

run_in_debug=False


In [18]:
SHUFFLE_BUFFER = 100
BATCH_SIZE = 32
NUM_CLASSES = 12

# Create a description of the features.  
feature_description = {
    'feature0': tf.FixedLenFeature([32768], tf.float32),
    'feature1': tf.FixedLenFeature([1], tf.int64)
}

def _parse_function(example_proto):
  # Parse the input tf.Example proto using the dictionary above.
    parsed_example = tf.parse_single_example(example_proto, feature_description)
    parsed_example["feature0"] = tf.transpose(tf.reshape(parsed_example['feature0'], (256,128)))
    return parsed_example

def create_dataset(filepath):
    
    dataset = tf.data.TFRecordDataset(filepath)
    
    dataset = dataset.map(_parse_function) #, num_parallel_calls=8)
    
    # This dataset will go on forever
    dataset = dataset.repeat()
    
    # Set the number of datapoints you want to load and shuffle 
    dataset = dataset.shuffle(SHUFFLE_BUFFER)
    dataset = dataset.batch(BATCH_SIZE)
    
    # Create an iterator
    iterator = dataset.make_one_shot_iterator()
    
    # Create your tf representation of the iterator
    feature = iterator.get_next()
    #print(feature)
    lmfcc = feature["feature0"]
    label = feature["feature1"]
    
    # Bring your picture back in shape
    lmfcc = tf.reshape(lmfcc, [-1,128, 256])
    
    # Create a one hot array for your labels
    label = tf.one_hot(label, NUM_CLASSES)
    print("inp",lmfcc.shape)
    print("inp",label.shape)

    return lmfcc, label

In [19]:
lmfcc, label = create_dataset("../data/full/sample.tfrecords")

inp (?, 128, 256)
inp (?, 1, 12)


In [20]:
def get_callbacks(checkpoint_name):
    logDir = "./Graph/" + datetime.now().strftime("%Y%m%d-%H%M%S") + "/"
    tb = TensorBoard(log_dir=logDir,
                     histogram_freq=2,
                     write_graph=True,
                     write_images=True,
                     write_grads=True,
                     update_freq='epoch')

#     tb_callback = TensorBoard(
#         log_dir='../models/logs/',
#         histogram_freq=1,
#         batch_size=32,
#         write_graph=True,
#         write_grads=False,
#         write_images=False,
#         embeddings_freq=0,
#         embeddings_layer_names=None,
#         embeddings_metadata=None,
#     )

    checkpoint_callback = ModelCheckpoint('../models/' + checkpoint_name +
                                          '{epoch:02d}-{val_loss:.2f}.hdf5',
                                          monitor='val_acc',
                                          verbose=1,
                                          save_best_only=True,
                                          mode='max')

    reducelr_callback = ReduceLROnPlateau(monitor='val_acc',
                                          factor=0.5,
                                          patience=10,
                                          min_delta=0.01,
                                          verbose=1)

    callback_list = [checkpoint_callback, reducelr_callback]

    return callback_list

In [21]:
debug_num_train_samples = 494
debug_num_val_samples = 150

full_num_train_samples = 21504
full_num_val_samples = 6969

if run_in_debug:
    lmfcc, label = create_dataset("../data/debug/train.tfrecords")
    lmfcc_val, label_val = create_dataset("../data/debug/val.tfrecords")
    num_train = debug_num_train_samples
    num_val = debug_num_val_samples
else:
    lmfcc, label = create_dataset("../data/debug/train.tfrecords")
    lmfcc_val, label_val = create_dataset("../data/debug/val.tfrecords")
    num_train = full_num_train_samples
    num_val = full_num_val_samples

inp (?, 128, 256)
inp (?, 1, 12)
inp (?, 128, 256)
inp (?, 1, 12)


In [26]:
#Build network
NUM_CLASSES = 12  # Must Change in the tf reader as well
N_LAYERS = 3
CONV_FILTER_COUNT = 64
FILTER_LENGTH = 5

POOL_SIZE = 2

GRU_COUNT = 64
NUM_HIDDEN = 128
L2_regularization = 0.001

BATCH_SIZE=32

# Input
model_input = keras.layers.Input(tensor=lmfcc)
print(model_input.shape)
layer = Permute((2, 1), input_shape=(128, 256))(model_input)
print(layer.shape)
# resize_shape = model.output_shape[2] * model.output_shape[3]
# model.add(Reshape((model.output_shape[1], resize_shape)))

# model.add(Permute((2, 1), input_shape=(10, 64)))
# now: model.output_shape == (None, 64, 10)

# Conv1D , input_shape=(10, 128) for time series sequences of 10 time steps with 128 features per step
# 1st conv
layer = Conv1D(filters=CONV_FILTER_COUNT,
               kernel_size=FILTER_LENGTH)(layer)  #(model_input)
layer = Activation('relu')(layer)
layer = MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_SIZE)(layer)
layer = Dropout(0.2)(layer)

for i in range(N_LAYERS - 1):
    layer = Conv1D(filters=128, kernel_size=FILTER_LENGTH)(layer)
    layer = Activation('relu')(layer)
    layer = MaxPooling1D(pool_size=POOL_SIZE, strides=POOL_SIZE)(layer)
    layer = Dropout(0.4)(layer)

## LSTM Layer
layer = GRU(GRU_COUNT, return_sequences=True)(layer)
layer = GRU(GRU_COUNT, return_sequences=False)(layer)

layer = Dropout(0.4)(layer)

## Softmax Output
layer = Dense(NUM_CLASSES)(layer)
layer = Activation('softmax')(layer)
model_output = layer

#model_output = Dense(NUM_CLASSES, activation='relu')(model_output)

#Create your model
train_model = Model(inputs=model_input, outputs=model_output)

#compile
train_model.compile(loss='categorical_crossentropy',
                    optimizer=Adam(lr=0.001),
                    metrics=['accuracy'],
                    target_tensors=[label])

#Train the model
#steps per epoch could be viewed as dataset/batchsize
# Better to change checkpoint name before run
train_model.fit(epochs=70,
                steps_per_epoch=num_train//BATCH_SIZE,
                validation_data=(lmfcc_val, label_val),
                validation_steps=num_val//BATCH_SIZE,
                callbacks=get_callbacks(checkpoint_name="trail"))


(?, 128, 256)
(?, 256, 128)
Epoch 1/70

Epoch 00001: val_acc improved from -inf to 0.16777, saving model to ../models/trail01-2.28.hdf5
Epoch 2/70

Epoch 00002: val_acc improved from 0.16777 to 0.18044, saving model to ../models/trail02-2.27.hdf5
Epoch 3/70

Epoch 00003: val_acc did not improve from 0.18044
Epoch 4/70

Epoch 00004: val_acc did not improve from 0.18044
Epoch 5/70

Epoch 00005: val_acc did not improve from 0.18044
Epoch 6/70

Epoch 00006: val_acc improved from 0.18044 to 0.18203, saving model to ../models/trail06-2.27.hdf5
Epoch 7/70
 91/672 [===>..........................] - ETA: 1:01 - loss: 2.3003 - acc: 0.1816

KeyboardInterrupt: 

In [8]:
train_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 128, 256)          0         
_________________________________________________________________
permute_1 (Permute)          (None, 256, 128)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 252, 64)           41024     
_________________________________________________________________
activation_1 (Activation)    (None, 252, 64)           0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 126, 64)           0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 126, 64)           0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 122, 128)          41088     
__________

In [None]:
#RNN

#Build network
NUM_CLASSES = 12  # Must Change in the tf reader as well
N_LAYERS = 3
CONV_FILTER_COUNT = 64
FILTER_LENGTH = 5

POOL_SIZE = 2

GRU_COUNT = 64
NUM_HIDDEN = 128
L2_regularization = 0.001

# Input
model_input = keras.layers.Input(tensor=lmfcc)
print(model_input.shape)
layer = Permute((2, 1), input_shape=(128, 256))(model_input)
print(layer.shape)
# resize_shape = model.output_shape[2] * model.output_shape[3]
# model.add(Reshape((model.output_shape[1], resize_shape)))

# model.add(Permute((2, 1), input_shape=(10, 64)))
# now: model.output_shape == (None, 64, 10)

## LSTM Layer
layer = LSTM(GRU_COUNT, return_sequences=True)(layer)
layer = LSTM(GRU_COUNT, return_sequences=False)(layer)

## Softmax Outputstart with 0.001 and my guess by the way steps per epoch is happening, i dont think it is seeing all samples in each epoch and thus would take more epochs to decrease the decrease the loss
layer = Dense(NUM_CLASSES)(layer)
layer = Activation('softmax')(layer)
model_output = layer

#model_output = Dense(NUM_CLASSES, activation='relu')(model_output)

#Create your model
train_model = Model(inputs=model_input, outputs=model_output)

#compile
train_model.compile(loss='categorical_crossentropy',
                    optimizer=Adam(lr=0.01),
                    metrics=['accuracy'],
                    target_tensors=[label])

#Train the model
#steps per epoch could be viewed as dataset/batchsize
batch_size = 32
# Better to change checkpoint name before run
train_model.fit(epochs=70,
                steps_per_epoch=num_train//batch_size,
                validation_data=(lmfcc_val, label_val),
                validation_steps=num_val//batch_size,
                callbacks=get_callbacks(checkpoint_name="trail"))

inp (?, 128, 256)
inp (?, 1, 12)
inp (?, 128, 256)
inp (?, 1, 12)
(?, 128, 256)
(?, 256, 128)
Epoch 1/70

Epoch 00001: val_acc improved from -inf to 0.18030, saving model to ../models/trail01-2.27.hdf5
Epoch 2/70

Epoch 00002: val_acc did not improve from 0.18030
Epoch 3/70

Epoch 00003: val_acc did not improve from 0.18030
Epoch 4/70

Epoch 00004: val_acc did not improve from 0.18030
Epoch 5/70

In [None]:
train_model.summary()