In [1]:
import wave
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import sklearn as sk
import scipy.io
import librosa
import librosa.display
%matplotlib inline
import tensorflow as tf

In [2]:
def load_sound_files(file_paths):
    raw_sounds = []
    
    
    items = os.listdir(file_paths)

    
    #searches through the input file for any files 
    #named .wav and adds them to the list
    
    newlist = []
    for names in items:
        if names.endswith(".wav"):
            newlist.append(names)
   
    #Loads the files found above in with librosa
    for fp in newlist:
        fp = os.path.join(path, fp)
        X,sr = librosa.load(fp,500)  
        raw_sounds.append(X)
    return raw_sounds



In [3]:
path = '/home/tim/Documents/Masters/Data/93-001-2321.ch13/'
raw_sounds =  load_sound_files(path)

In [4]:
"""
For testing purposes at the moment, code is here to allow you to view the input data.


"""
def plot_waves(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        librosa.display.waveplot(np.array(f),sr=500)
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 1: Waveplot',x=0.5, y=0.915,fontsize=18)
    plt.show()
    
def plot_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 900)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        f_ = librosa.stft(y = f, n_fft= 256, win_length= 256)
        librosa.display.specshow(librosa.amplitude_to_db(f_),
                                        sr = 500,
                                        y_axis = 'log',
                                        hop_length = 64 )
        #specgram(np.array(f), Fs=500, mode = 'psd')
        plt.title(n.title())
        #plt.colorbar(format='%+4.0f dB')
        i += 1
    plt.suptitle('Figure 2: Spectrogram',x=0.5, y=0.915,fontsize=18)
    plt.show()

def plot_log_power_specgram(sound_names,raw_sounds):
    i = 1
    fig = plt.figure(figsize=(25,60), dpi = 1200)
    for n,f in zip(sound_names,raw_sounds):
        plt.subplot(10,1,i)
        D = librosa.logamplitude(np.abs(librosa.stft(f))**2, ref_power=np.max)
        librosa.display.specshow(D,x_axis='time' ,y_axis='log', sr = 500)
        plt.title(n.title())
        i += 1
    plt.suptitle('Figure 3: Log power spectrogram',x=0.5, y=0.915,fontsize=18)
    plt.show()
 
plot_waves('minke',raw_sounds)
plot_specgram('minke',raw_sounds)
plot_log_power_specgram('minke',raw_sounds)

<matplotlib.figure.Figure at 0x7ff48fb7eef0>

<matplotlib.figure.Figure at 0x7ff48fb7ee10>

<matplotlib.figure.Figure at 0x7ff48fb7ef98>

In [5]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    #mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    #chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    #mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    #contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    #tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return X, sample_rate #mfccs,#chroma,mel,contrast,tonnetz


In [6]:
# def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
#     features, labels = np.empty((0, 193)), np.empty(0)

   
#     for label, sub_dir in enumerate(sub_dirs):
         
#         items = os.listdir(os.path.join(parent_dir, sub_dir))
#         labels = labels.itemset(label)
    
#         #searches through the input file for any files 
#         #named .wav and adds them to the list
    
#         files = []
#         for names in items:
            
#             if names.endswith(".wav"):
#                 #loc = os.path.join(items[1], names)
                
#                 files.append(names)
               
#                 #print(files)
        
                
#         for fn in files:
            
#             file = os.path.join(parent_dir, sub_dir, fn)
            
#             mfccs, chroma, mel, contrast,tonnetz = extract_feature(file)
#             ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
#             features = np.vstack([features,ext_features])
        

        
#     return np.array(features), np.array(labels, dtype = np.float)



def load_data(data_directory):
    
    """
    Returns the features and labels of the wave data. 
    """
    directories = [d for d in os.listdir(data_directory) 
                   if os.path.isdir(os.path.join(data_directory, d))]

    features, labels = np.empty((0, 193)), []
    for d in directories:
        label_directory = os.path.join(data_directory, d)
        file_names = [os.path.join(label_directory, f) 
                      for f in os.listdir(label_directory) 
                      if f.endswith(".wav")]
        
        for f in file_names:
#             images.append(skimage.data.imread(f))
            X, sample_rate = extract_feature(f)    #chroma, mel, mfccs, contrast,tonnetz - items removed for now.
            #ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
            #features = np.vstack([features])
            labels.append(int(d))
            features = np.vstack(X)
              
    return features, labels

ROOT_PATH = "/home/tim/Documents/Masters/Data"
train_data_directory = os.path.join(ROOT_PATH, "Autoencoder test/Training")
test_data_directory = os.path.join(ROOT_PATH, "Autoencoder test/Testing")

tr_features, tr_labels = load_data(train_data_directory)
ts_features, ts_labels = load_data(train_data_directory)

In [None]:
def one_hot_encode(labels):
    n_labels = len(labels)
    print('labels ', n_labels)
    n_unique_labels = len(np.unique(labels))
    print('unique labels ', n_unique_labels)
    one_hot_encode = np.eye(n_unique_labels)
    print('one Hot', one_hot_encode)
    return one_hot_encode

In [None]:
# This stuff needs to be moved from above to clean up the code. 
# ROOT_PATH = "/home/tim/Documents/Masters/Data"
# train_data_directory = os.path.join(ROOT_PATH, "Autoencoder test/Training")
# test_data_directory = os.path.join(ROOT_PATH, "Autoencoder test/Testing")

# tr_features, tr_labels = load_data(train_data_directory)
# ts_features, ts_labels = load_data(train_data_directory)

tr_labels = one_hot_encode(tr_labels)
ts_labels = one_hot_encode(ts_labels)
print(tr_labels, ts_labels)

In [None]:
tf.reset_default_graph()

learning_rate = 0.01
training_iters = 1000
batch_size = 10
display_step = 200

# Network Parameters
n_input = 1
number_of_layers = 2
n_steps = 50
n_hidden = 300
n_classes = 2 

x = tf.placeholder("float", [None, n_steps, n_input], name= 'x')
y = tf.placeholder("float", [None, n_classes], name = 'y')

weight = tf.Variable(tf.random_normal([n_hidden, n_classes]))
bias = tf.Variable(tf.random_normal([n_classes]))

In [None]:
def lstm_cell(n_hidden,state_is_tuple = True):
  return tf.contrib.rnn.BasicLSTMCell(n_hidden)

In [None]:
def RNN(x, weight, bias, number_of_layers):
    
    cell = tf.contrib.rnn.MultiRNNCell(
    [lstm_cell(n_hidden,state_is_tuple = True) for _ in range(number_of_layers)])
    output, state = tf.nn.dynamic_rnn(cell, x, dtype = tf.float32)
    output = tf.transpose(output, [1, 0, 2])
    last = tf.gather(output, int(output.get_shape()[0]) - 1)
    return tf.nn.softmax(tf.matmul(last, weight) + bias)




In [None]:
prediction = RNN(x, weight, bias, number_of_layers)

# Define loss and optimizer
loss_f = -tf.reduce_sum(y * tf.log(prediction))
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(loss_f)

# Evaluate model
correct_pred = tf.equal(tf.argmax(prediction,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

In [None]:

with tf.Session() as session:
    session.run(init)
    
    for itr in range(training_iters):    
        offset = (itr *  batch_size) % (tr_labels.shape[0] - batch_size)
        batch_x = tr_features[None ,offset:(offset + batch_size), :]
        print('length of batch_x' , len(batch_x), batch_x)
        batch_y = tr_labels[offset:(offset + batch_size), :]
        print('length of y' , len(batch_y))
        _, c = session.run([optimizer, loss_f],feed_dict={x: batch_x, y : batch_y})
            
        if itr % display_step == 0:
            # Calculate batch accuracy
            acc = session.run(accuracy, feed_dict={x: batch_x, y: batch_y})
            # Calculate batch loss
            loss = session.run(loss_f, feed_dict={x: batch_x, y: batch_y})
            print ("Iter " + str(itr) + ", Minibatch Loss= " + \
                  "{:.6f}".format(loss) + ", Training Accuracy= " + \
                  "{:.5f}".format(acc))
    
    print('Test accuracy: ',round(session.run(accuracy, feed_dict={x: ts_features, y: ts_labels}) , 3))