In [1]:
import wave
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import sklearn as sk
import scipy.io
import librosa
import librosa.display
%matplotlib inline
import tensorflow as tf

In [2]:
def load_sound_files(file_paths):
    raw_sounds = []
    
    
    items = os.listdir(file_paths)

    
    #searches through the input file for any files 
    #named .wav and adds them to the list
    
    newlist = []
    for names in items:
        if names.endswith(".wav"):
            newlist.append(names)
   
    #Loads the files found above in with librosa
    for fp in newlist:
        fp = os.path.join(path, fp)
        X,sr = librosa.load(fp,500)  
        raw_sounds.append(X)
    return raw_sounds



In [3]:
path = '/home/tim/Documents/Masters/Data/93-001-2321.ch13/'
raw_sounds =  load_sound_files(path)

In [4]:
"""
For testing purposes at the moment, code is here to allow you to view the input data.


"""
def plot_waves(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        librosa.display.waveplot(np.array(f),sr=500)
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 1: Waveplot',x=0.5, y=0.915,fontsize=18)
    plt.show()
    
def plot_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        f_ = librosa.stft(y = f, n_fft= 256, win_length= 256)
        librosa.display.specshow(librosa.amplitude_to_db(f_),
                                        sr = 500,
                                        y_axis = 'log',
                                        hop_length = 64 )
        #specgram(np.array(f), Fs=500, mode = 'psd')
        plt.title(n.title())
        #plt.colorbar(format='%+4.0f dB')
        i += 1
    plt.suptitle('Figure 2: Spectrogram',x=0.5, y=0.915,fontsize=18)
    plt.show()

def plot_log_power_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 1200)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max)
        librosa.display.specshow(D,x_axis='time' ,y_axis='log', sr = 500)
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 3: Log power spectrogram',x=0.5, y=0.915,fontsize=18)
    plt.show()
 
plot_waves('minke',raw_sounds)
plot_specgram('minke',raw_sounds)
plot_log_power_specgram('minke',raw_sounds)

<matplotlib.figure.Figure at 0x7fc259cffef0>

<matplotlib.figure.Figure at 0x7fc259cffe10>

<matplotlib.figure.Figure at 0x7fc259cfffd0>

In [5]:
def extract_features(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    #chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    #mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    #contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    #tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return X, sample_rate, mfccs#chroma,mel,contrast,tonnetz


In [6]:
def windows(data, window_size):
    start = 0
    while start < len(data):
        yield start, start + window_size
        start += int(window_size / 2)


In [7]:
# def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
#     features, labels = np.empty((0, 193)), np.empty(0)

   
#     for label, sub_dir in enumerate(sub_dirs):
         
#         items = os.listdir(os.path.join(parent_dir, sub_dir))
#         labels = labels.itemset(label)
    
#         #searches through the input file for any files 
#         #named .wav and adds them to the list
    
#         files = []
#         for names in items:
            
#             if names.endswith(".wav"):
#                 #loc = os.path.join(items[1], names)
                
#                 files.append(names)
               
#                 #print(files)
        
                
#         for fn in files:
            
#             file = os.path.join(parent_dir, sub_dir, fn)
            
#             mfccs, chroma, mel, contrast,tonnetz = extract_feature(file)
#             ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
#             features = np.vstack([features,ext_features])
        

        
#     return np.array(features), np.array(labels, dtype = np.float)



# def load_data(data_directory):
#     
#     """
#     Returns the features and labels of the wave data. 
#     """
#     directories = [d for d in os.listdir(data_directory) 
#                    if os.path.isdir(os.path.join(data_directory, d))]
# 
#     features, labels = np.empty((0, 193)), []
#     for d in directories:
#         label_directory = os.path.join(data_directory, d)
#         file_names = [os.path.join(label_directory, f) 
#                       for f in os.listdir(label_directory) 
#                       if f.endswith(".wav")]
#         
#         for f in file_names:
# #             images.append(skimage.data.imread(f))
#             X, sample_rate, mfccs = extract_feature(f)    #chroma, mel, mfccs, contrast,tonnetz - items removed for now.
#             ext_features = np.hstack([X,mfccs])
#             features = np.vstack([ext_features])
#             labels.append(int(d))
#             features = np.hstack(X, mfccs)
#               
#     return features, labels



def extract_features(parent_dir,sub_dirs,file_ext="*.wav",bands = 20, frames = 41):
    window_size = 512 * (frames - 1)

    mfccs = []
    labels = []

    for l, sub_dir in enumerate(sub_dirs):
        
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            
            sound_clip, s = librosa.load(fn)
            label = l
            
            for (start, end) in windows(sound_clip, window_size):

                if(len(sound_clip[start:end]) == window_size):
                    signal = sound_clip[start:end]
                    mfcc = librosa.feature.mfcc(y=signal, sr=s, n_mfcc = bands).T.flatten()[:, np.newaxis].T
                    mfccs.append(mfcc)
                    labels.append(label)         
    features = np.asarray(mfccs).reshape(len(mfccs),frames,bands)
    return np.array(features), np.array(labels,dtype = np.int)

In [12]:
def get_directories(ROOT_PATH, directory):

    directories = [d for d in os.listdir(directory) 
                if os.path.isdir(os.path.join(directory, d))]
    print(directories)
    return directories

In [34]:

ROOT_PATH = "/home/tim/Documents/Masters/Data"
train_data_directory = os.path.join(ROOT_PATH, "Autoencoder test/Training")
test_data_directory = os.path.join(ROOT_PATH, "Autoencoder test/Testing")

train_directories = get_directories(ROOT_PATH, train_data_directory)
test_directories = get_directories(ROOT_PATH, test_data_directory)

#tr_features, tr_labels = load_data(train_data_directory)
#ts_features, ts_labels = load_data(train_data_directory)

tr_features, tr_labels = extract_features(train_data_directory, train_directories)
#ts_features, ts_labels = extract_features(train_data_directory, test_directories)


['001', '000', '002']
['001', '000']
20480
['001', '000', '002']


In [35]:
print(tr_features)

[[[ -6.33599509e+02   1.63328188e+02   7.74668508e+01 ...,   4.32764929e+00
     4.19227774e+00   3.31725863e+00]
  [ -6.63992239e+02   1.43559778e+02   9.25532866e+01 ...,   2.32071750e+00
     1.50304815e+00   2.30788872e-01]
  [ -7.29013189e+02   7.45636007e+01   7.28630575e+01 ...,  -7.09248750e+00
    -9.74986211e+00  -1.16289016e+01]
  ..., 
  [ -7.32994500e+02   6.90056216e+01   6.75202209e+01 ...,  -2.58279776e+00
    -4.95425237e+00  -6.65036992e+00]
  [ -6.81462055e+02   1.28940600e+02   9.67135580e+01 ...,   7.08909252e-01
    -5.37511813e-01  -1.32880500e+00]
  [ -6.52693318e+02   1.54232969e+02   9.39386972e+01 ...,   2.10378674e-01
    -6.98548065e-01  -1.37886549e+00]]

 [[ -6.62885641e+02   1.29182642e+02   9.72616213e+01 ...,   2.21115043e+00
     1.28083189e+00   6.93114484e-01]
  [ -6.82325950e+02   1.09364024e+02   9.45133528e+01 ...,   1.22745833e+00
    -9.30089423e-02  -9.26530290e-01]
  [ -7.11987698e+02   7.24798193e+01   7.09682979e+01 ...,  -3.29863108e+00
  

In [14]:
def one_hot_encode(labels):
    n_labels = len(labels)
    print('labels ', n_labels)
    n_unique_labels = len(np.unique(labels))
    print('unique labels ', n_unique_labels)
    one_hot_encode = np.eye(n_unique_labels)
    print('one Hot', one_hot_encode)
    return one_hot_encode

In [36]:
# This stuff needs to be moved from above to clean up the code. 
# ROOT_PATH = "/home/tim/Documents/Masters/Data"
# train_data_directory = os.path.join(ROOT_PATH, "Autoencoder test/Training")
# test_data_directory = os.path.join(ROOT_PATH, "Autoencoder test/Testing")

# tr_features, tr_labels = load_data(train_data_directory)
# ts_features, ts_labels = load_data(train_data_directory)

tr_labels = one_hot_encode(tr_labels)
#ts_labels = one_hot_encode(ts_labels)
print(tr_labels)

labels  19666
unique labels  2
one Hot [[ 1.  0.]
 [ 0.  1.]]
[[ 1.  0.]
 [ 0.  1.]]


In [37]:
tf.reset_default_graph()

learning_rate = 0.01
training_iters = 1000
batch_size = 50
display_step = 200

# Network Parameters
n_input = 20
number_of_layers = 2
n_steps = 41
n_hidden = 300
n_classes = 2 

x = tf.placeholder("float", [None, n_steps, n_input], name= 'x')
y = tf.placeholder("float", [None, n_classes], name = 'y')

weight = tf.Variable(tf.random_normal([n_hidden, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))

In [38]:
def lstm_cell(n_hidden,state_is_tuple = True):
  return tf.contrib.rnn.BasicLSTMCell(n_hidden)

In [39]:
def RNN(x, weight, bias, number_of_layers):
    
    cell = tf.contrib.rnn.MultiRNNCell(
    [lstm_cell(n_hidden,state_is_tuple = True) for _ in range(number_of_layers)])
    output, state = tf.nn.dynamic_rnn(cell, x, dtype = tf.float32)
    output = tf.transpose(output, [1, 0, 2])
    last = tf.gather(output, int(output.get_shape()[0]) - 1)
    return tf.nn.softmax(tf.matmul(last, weight) + bias)




In [40]:
prediction = RNN(x, weight, bias, number_of_layers)

# Define loss and optimizer
loss_f = -tf.reduce_sum(y * tf.log(prediction))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss_f)

# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [42]:

with tf.Session() as session:
    session.run(init)
    
    for itr in range(training_iters):    
        offset = (itr *  batch_size) % (tr_labels.shape[0] - batch_size)
        batch_x = tr_features[None ,offset:(offset + batch_size), :]
        print('length of batch_x' , len(batch_x))
        batch_y = tr_labels[offset:(offset + batch_size), :]
        print('length of y' , len(batch_y))
        _, c = session.run([optimizer, loss_f],feed_dict={x: batch_x, y : batch_y})
            
        if itr % display_step == 0:
            # Calculate batch accuracy
            acc = session.run(accuracy, feed_dict={x: batch_x, y: batch_y})
            # Calculate batch loss
            loss = session.run(loss_f, feed_dict={x: batch_x, y: batch_y})
            print ("Iter " + str(itr) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
    
    print('Test accuracy: ',round(session.run(accuracy, feed_dict={x: ts_features, y: ts_labels}) , 3))

length of batch_x 1
length of y 2


ValueError: Cannot feed value of shape (1, 50, 41, 20) for Tensor 'x:0', which has shape '(?, 41, 20)'

In [45]:
with tf.Session() as session:
    session.run(init)
    
    for itr in range(training_iters):    
        offset = (itr * batch_size) % (tr_labels.shape[0] - batch_size)
        batch_x = tr_features[offset:(offset + batch_size), :, :]
        batch_y = tr_labels[offset:(offset + batch_size), :]
        _, c = session.run([optimizer, loss_f],feed_dict={x: batch_x, y : batch_y})
            
        if itr % display_step == 0:
            # Calculate batch accuracy
            acc = session.run(accuracy, feed_dict={x: batch_x, y: batch_y})
            # Calculate batch loss
            loss = session.run(loss_f, feed_dict={x: batch_x, y: batch_y})
            print ("Iter " + str(epoch) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
    
    print('Test accuracy: ',round(session.run(accuracy, feed_dict={x: ts_features, y: ts_labels}) , 3))

InvalidArgumentError: Incompatible shapes: [2,2] vs. [50,2]
	 [[Node: gradients/mul_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/mul_grad/Shape, gradients/mul_grad/Shape_1)]]

Caused by op 'gradients/mul_grad/BroadcastGradientArgs', defined at:
  File "/home/tim/anaconda3/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/tim/anaconda3/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/home/tim/anaconda3/lib/python3.5/site-packages/traitlets/config/application.py", line 653, in launch_instance
    app.start()
  File "/home/tim/anaconda3/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/home/tim/anaconda3/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 162, in start
    super(ZMQIOLoop, self).start()
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/tim/anaconda3/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/tim/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-40-61bd6d811072>", line 5, in <module>
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss_f)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 315, in minimize
    grad_loss=grad_loss)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/training/optimizer.py", line 386, in compute_gradients
    colocate_gradients_with_ops=colocate_gradients_with_ops)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 540, in gradients
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 346, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/gradients_impl.py", line 540, in <lambda>
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/math_grad.py", line 663, in _MulGrad
    rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/gen_array_ops.py", line 395, in _broadcast_gradient_args
    name=name)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2506, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1269, in __init__
    self._traceback = _extract_stack()

...which was originally created as op 'mul', defined at:
  File "/home/tim/anaconda3/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
[elided 18 identical lines from previous traceback]
  File "/home/tim/anaconda3/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-40-61bd6d811072>", line 4, in <module>
    loss_f = -tf.reduce_sum(y * tf.log(prediction))
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 838, in binary_op_wrapper
    return func(x, y, name=name)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/math_ops.py", line 1061, in _mul_dispatch
    return gen_math_ops._mul(x, y, name=name)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/ops/gen_math_ops.py", line 1377, in _mul
    result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2506, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/tim/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1269, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): Incompatible shapes: [2,2] vs. [50,2]
	 [[Node: gradients/mul_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/cpu:0"](gradients/mul_grad/Shape, gradients/mul_grad/Shape_1)]]
