# SAMPLE DS MODEL

## 0. IMPORTS

In [10]:
import numpy as np
import matplotlib.pyplot as plt

from IPython.core.display import HTML, display
from IPython.display import display, Audio

import sys
import base64
import struct  
import librosa
import librosa.display

## 6. KERAS

We can load this mfcc data into a DS1 NN

In [15]:
# imports
import keras
from keras import backend as K
from keras.models import Model, Sequential
from keras.layers.recurrent import SimpleRNN
from keras.layers import Dense, Activation, Bidirectional, Reshape, Lambda, Input
from keras.optimizers import SGD, adam

from keras.layers.merge import add, concatenate



In [16]:

def next_train():
    mfcc_nums = 26
    absolute_max_string_len = 126
    
    ## Singledata example
    x, fs = librosa.load("data/ldc93s1/LDC93S1.wav", sr=16000)
    d = librosa.feature.mfcc(x, sr=fs, n_mfcc=mfcc_nums)
    s = "She had your dark suit in greasy wash water all year."
    
    length = len(d)
    batch_size = 1
    
    X_data = np.ones([batch_size, mfcc_nums])
    
    labels = np.ones([batch_size, absolute_max_string_len])
    input_length = np.zeros([batch_size, 1])
    label_length = np.zeros([batch_size, 1])
    
    # todo loop and load in each datapoint
      
    inputs = {
         'the_input': X_data,
         'the_labels': labels,
         'input_length': input_length,
         'label_length': label_length
         }

    outputs = {'ctc': np.zeros([batch_size])}
    
    while 1:
        yield (inputs, outputs)


# Define CTC loss
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    y_pred = y_pred[:, 2:, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)



## 7. MODEL

Let's build the model

In [18]:
# Network Params
fc_size = 2048
rnn_size = 512
rnn_steps = 16
act = 'relu'

# This returns shape of audio mfcc which is 26x92
x, fs = librosa.load("data/ldc93s1/LDC93S1.wav", sr=16000)
mfcc = librosa.feature.mfcc(x, sr=fs, n_mfcc=26)

input_shape = mfcc.shape # 26 x 92

# Creates a tensor there are always 26 MFCC's however we don't know how many secs
input_data = Input(name='the_input', shape=(input_shape[0],))

# First 3 FC layers
x = Dense(fc_size, name='fc1', activation='relu')(input_data)
x = Dense(fc_size, name='fc2', activation='relu')(x)
x = Dense(fc_size, name='fc3', activation='relu')(x)

# Layer 4 BiDirectional RNN
to_rnn_dims = (fc_size/rnn_steps, rnn_steps)
x = Reshape(target_shape=to_rnn_dims, name='reshape')(x)

rnn_1f = SimpleRNN(rnn_size, return_sequences=True, go_backwards=False, kernel_initializer='he_normal', name='rnn_f')(x)
rnn_1b = SimpleRNN(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='rnn_b')(x)
rnn_merged = add([rnn_1f, rnn_1b])
x = Activation('relu', name='birelu')(rnn_merged)

# Layer 5 FC Layer
x = Dense(fc_size, name='fc5', activation='relu')(x)

# Layer 6 Output via softmax
y_pred = Activation('softmax', name='softmax')(x)


In [19]:

# Change shape 
labels = Input(name='the_labels', shape=[126], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')

# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred,
                                                                   labels, 
                                                                   input_length, 
                                                                   label_length])

sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)

model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out)

# the loss calc occurs elsewhere, so use a dummy lambda func for the loss
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd)

## What's the output of the model? 



In [20]:
model.summary(line_length=80)

config = model.get_config()


________________________________________________________________________________
Layer (type)              Output Shape      Param #  Connected to               
the_input (InputLayer)    (None, 26)        0                                   
________________________________________________________________________________
fc1 (Dense)               (None, 2048)      55296    the_input[0][0]            
________________________________________________________________________________
fc2 (Dense)               (None, 2048)      4196352  fc1[0][0]                  
________________________________________________________________________________
fc3 (Dense)               (None, 2048)      4196352  fc2[0][0]                  
________________________________________________________________________________
reshape (Reshape)         (None, 128, 16)   0        fc3[0][0]                  
________________________________________________________________________________
rnn_f (SimpleRNN)         (N


## 8. TRAIN




In [21]:
# test_func = K.function([inputs], [y_pred])

# model.fit(inputs, outputs, epochs=10, batch_size=1)  # starts training

model.fit_generator(generator=next_train(), 
                    steps_per_epoch=1,
                    epochs=2,
                    
                    callbacks=[], 
                    validation_data=next_train(),
                    validation_steps=1,
                    initial_epoch=1)


Epoch 2/2


InvalidArgumentError: slice index 0 of dimension 0 out of bounds.
	 [[Node: ctc/scan/strided_slice = StridedSlice[Index=DT_INT32, T=DT_INT32, begin_mask=0, ellipsis_mask=0, end_mask=0, new_axis_mask=0, shrink_axis_mask=1, _device="/job:localhost/replica:0/task:0/cpu:0"](ctc/scan/Shape, ctc/scan/strided_slice/stack, ctc/scan/strided_slice/stack_1, ctc/scan/strided_slice/stack_2)]]

Caused by op u'ctc/scan/strided_slice', defined at:
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/runpy.py", line 162, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/System/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/Users/rob/py27/lib/python2.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/rob/py27/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/rob/py27/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/Users/rob/py27/lib/python2.7/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/Users/rob/py27/lib/python2.7/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/Users/rob/py27/lib/python2.7/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/rob/py27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/Users/rob/py27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/rob/py27/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/Users/rob/py27/lib/python2.7/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/rob/py27/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/rob/py27/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/rob/py27/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Users/rob/py27/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/rob/py27/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/rob/py27/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2705, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/rob/py27/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2809, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/rob/py27/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2869, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-19-09cbd6b805b2>", line 12, in <module>
    label_length])
  File "/Users/rob/py27/lib/python2.7/site-packages/keras/engine/topology.py", line 596, in __call__
    output = self.call(inputs, **kwargs)
  File "/Users/rob/py27/lib/python2.7/site-packages/keras/layers/core.py", line 647, in call
    return self.function(inputs, **arguments)
  File "<ipython-input-16-47e45d8e21f5>", line 41, in ctc_lambda_func
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
  File "/Users/rob/py27/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 3648, in ctc_batch_cost
    sparse_labels = tf.to_int32(ctc_label_dense_to_sparse(y_true, label_length))
  File "/Users/rob/py27/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 3612, in ctc_label_dense_to_sparse
    initializer=init, parallel_iterations=1)
  File "/Users/rob/py27/lib/python2.7/site-packages/tensorflow/python/ops/functional_ops.py", line 526, in scan
    n = array_ops.shape(elems_flat[0])[0]
  File "/Users/rob/py27/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 497, in _SliceHelper
    name=name)
  File "/Users/rob/py27/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 655, in strided_slice
    shrink_axis_mask=shrink_axis_mask)
  File "/Users/rob/py27/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 3568, in strided_slice
    shrink_axis_mask=shrink_axis_mask, name=name)
  File "/Users/rob/py27/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/Users/rob/py27/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/Users/rob/py27/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): slice index 0 of dimension 0 out of bounds.
	 [[Node: ctc/scan/strided_slice = StridedSlice[Index=DT_INT32, T=DT_INT32, begin_mask=0, ellipsis_mask=0, end_mask=0, new_axis_mask=0, shrink_axis_mask=1, _device="/job:localhost/replica:0/task:0/cpu:0"](ctc/scan/Shape, ctc/scan/strided_slice/stack, ctc/scan/strided_slice/stack_1, ctc/scan/strided_slice/stack_2)]]


In [9]:
# score = model.evaluate(x_train, y_train, batch_size=1) # starts test

In [None]:
# # Create Model

# # Network Params
# fc_size = 2048
# rnn_size = 512
# rnn_steps = 16

# time_dense_size = 32

# model = Sequential()

# # First 3 FC layers
# model.add(Dense(fc_size, input_shape=(input_shape[0],), name='fc1', activation='relu'))
# model.add(Dense(fc_size, name='fc2', activation='relu'))
# model.add(Dense(fc_size, name='fc3', activation='relu'))

# # Layer 4 BiDirectional RNN
# model.add(Reshape((fc_size/rnn_steps, rnn_steps)))
# model.add(Bidirectional(SimpleRNN(rnn_size, return_sequences=True, go_backwards=False, name='rnn_f')))
# model.add(Bidirectional(SimpleRNN(rnn_size, return_sequences=True, go_backwards=True, name='rnn_b', activation='relu')))

# # Layer 5 FC Layer
# model.add(Dense(fc_size, name='fc5', activation='softmax'))