In [1]:
import os
import time
import librosa
import numpy as np
import tensorflow as tf
import cPickle as pickle
from datetime import datetime
from datetime import timedelta
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support


%matplotlib inline
plt.style.use('ggplot')

plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.serif'] = 'Ubuntu'
plt.rcParams['font.monospace'] = 'Ubuntu Mono'
plt.rcParams['font.size'] = 12
plt.rcParams['axes.labelsize'] = 11
plt.rcParams['axes.labelweight'] = 'bold'
plt.rcParams['axes.titlesize'] = 14
plt.rcParams['xtick.labelsize'] = 10
plt.rcParams['ytick.labelsize'] = 10
plt.rcParams['legend.fontsize'] = 11
plt.rcParams['figure.titlesize'] = 13

In [2]:
pickle_file = "../../my_features/US8K_logmel128_patchslice.pickle"
with open(pickle_file, "rb") as f:
    dataset = pickle.load(f)
    X_train_all = dataset["X_train"]/10
    Y_train_all = dataset["Y_train"]
    X_valid_all = dataset["X_valid"]/10
    Y_valid_all = dataset["Y_valid"]
    X_test1_all = dataset["X_test1"]/10
    Y_test1_all = dataset["Y_test1"]
    X_test2_all = dataset["X_test2"]/10
    Y_test2_all = dataset["Y_test2"]
    del dataset

In [3]:
print np.sum(Y_train_all, axis = 0)
print np.sum(Y_train_all)
print np.unique(np.argmax(Y_train_all, axis=1), return_counts=True)
print ('training data: ' , X_train_all.shape, Y_train_all.shape)
print (np.sum(Y_train_all, axis=0))

[ 2793.   765.  2763.  2027.  2321.  2704.   213.  2625.  2717.  2800.]
21728.0
(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([2793,  765, 2763, 2027, 2321, 2704,  213, 2625, 2717, 2800]))
('training data: ', (21728, 16384), (21728, 10))
[ 2793.   765.  2763.  2027.  2321.  2704.   213.  2625.  2717.  2800.]


In [4]:
## Use 2 classes as training data
X_train = X_train_all[(np.argmax(Y_train_all, axis=1) == 4)+(np.argmax(Y_train_all, axis=1) == 0)]
Y_train = Y_train_all[(np.argmax(Y_train_all, axis=1) == 4)+(np.argmax(Y_train_all, axis=1) == 0)][:,(0,4)]

X_valid = X_valid_all[(np.argmax(Y_valid_all, axis=1) == 4)+(np.argmax(Y_valid_all, axis=1) == 0)]
Y_valid = Y_valid_all[(np.argmax(Y_valid_all, axis=1) == 4)+(np.argmax(Y_valid_all, axis=1) == 0)][:,(0,4)]

X_test1 = X_test1_all[(np.argmax(Y_test1_all, axis=1) == 4)+(np.argmax(Y_test1_all, axis=1) == 0)]
Y_test1 = Y_test1_all[(np.argmax(Y_test1_all, axis=1) == 4)+(np.argmax(Y_test1_all, axis=1) == 0)][:,(0,4)]

X_test2 = X_test2_all[(np.argmax(Y_test2_all, axis=1) == 4)+(np.argmax(Y_test2_all, axis=1) == 0)]
Y_test2 = Y_test2_all[(np.argmax(Y_test2_all, axis=1) == 4)+(np.argmax(Y_test2_all, axis=1) == 0)][:,(0,4)]

In [5]:

## Use 5 classes as training data
# X_train = X_train_all[(np.argmax(Y_train_all, axis=1) == 4)+(np.argmax(Y_train_all, axis=1) == 3)+
#                       (np.argmax(Y_train_all, axis=1) == 2)+(np.argmax(Y_train_all, axis=1) == 8)+
#                       (np.argmax(Y_train_all, axis=1) == 7)+(np.argmax(Y_train_all, axis=1) == 5)]
# Y_train = Y_train_all[(np.argmax(Y_train_all, axis=1) == 4)+(np.argmax(Y_train_all, axis=1) == 3)+
#                       (np.argmax(Y_train_all, axis=1) == 2)+(np.argmax(Y_train_all, axis=1) == 8)+
#                       (np.argmax(Y_train_all, axis=1) == 7)+(np.argmax(Y_train_all, axis=1) == 5)][:,(2,3,4,5,8,7)]

# X_valid = X_valid_all[(np.argmax(Y_valid_all, axis=1) == 4)+(np.argmax(Y_valid_all, axis=1) == 3)+
#                       (np.argmax(Y_valid_all, axis=1) == 2)+(np.argmax(Y_valid_all, axis=1) == 8)+ 
#                       (np.argmax(Y_valid_all, axis=1) == 7)+(np.argmax(Y_valid_all, axis=1) == 5)]
# Y_valid = Y_valid_all[(np.argmax(Y_valid_all, axis=1) == 4)+(np.argmax(Y_valid_all, axis=1) == 3)+
#                       (np.argmax(Y_valid_all, axis=1) == 2)+(np.argmax(Y_valid_all, axis=1) == 8)+ 
#                       (np.argmax(Y_valid_all, axis=1) == 7)+(np.argmax(Y_valid_all, axis=1) == 5)][:,(2,3,4,5,8,7)]

# X_test1 = X_test1_all[(np.argmax(Y_test1_all, axis=1) == 4)+(np.argmax(Y_test1_all, axis=1) == 3)+
#                       (np.argmax(Y_test1_all, axis=1) == 2)+(np.argmax(Y_test1_all, axis=1) == 8)+
#                       (np.argmax(Y_test1_all, axis=1) == 7)+(np.argmax(Y_test1_all, axis=1) == 5)]
# Y_test1 = Y_test1_all[(np.argmax(Y_test1_all, axis=1) == 4)+(np.argmax(Y_test1_all, axis=1) == 3)+
#                       (np.argmax(Y_test1_all, axis=1) == 2)+(np.argmax(Y_test1_all, axis=1) == 8)+
#                       (np.argmax(Y_test1_all, axis=1) == 7)+(np.argmax(Y_test1_all, axis=1) == 5)][:,(2,3,4,5,8,7)]

# X_test2 = X_test2_all[(np.argmax(Y_test2_all, axis=1) == 4)+(np.argmax(Y_test2_all, axis=1) == 3)+
#                       (np.argmax(Y_test2_all, axis=1) == 2)+(np.argmax(Y_test2_all, axis=1) == 8)+
#                       (np.argmax(Y_test2_all, axis=1) == 7)+(np.argmax(Y_test2_all, axis=1) == 5)]
# Y_test2 = Y_test2_all[(np.argmax(Y_test2_all, axis=1) == 4)+(np.argmax(Y_test2_all, axis=1) == 3)+
#                       (np.argmax(Y_test2_all, axis=1) == 2)+(np.argmax(Y_test2_all, axis=1) == 8)+
#                       (np.argmax(Y_test2_all, axis=1) == 7)+(np.argmax(Y_test2_all, axis=1) == 5)][:,(2,3,4,5,8,7)]


## Use the whole data set as training data

# X_train = X_train_all
# Y_train = Y_train_all

# X_valid = X_valid_all
# Y_valid = Y_valid_all

# X_test1 = X_test1_all
# Y_test1 = Y_test1_all

# X_test2 = X_test2_all
# Y_test2 = Y_test2_all


In [8]:
IMG_SIZE = 128
IMG_WIDTH = 128
IMG_HEIGHT = 128
IMG_FLAT_SIZE = IMG_WIDTH*IMG_HEIGHT
IMG_SHAPE = (IMG_WIDTH, IMG_HEIGHT)
N_LABELS = Y_train.shape[1]

In [9]:
print (Y_train.shape)
print (np.unique(np.argmax(Y_train, axis=1), return_counts=True))
print (X_train.shape)

(5114, 2)
(array([0, 1]), array([2793, 2321]))
(5114, 16384)


## 2. Recurrent Neural Net

In [10]:
## params
chunk_size = 128 # n_bands
n_chunks = 128 # n_frames
hidden_size = 300 # lstm weight size
batch_size = 50 # SGD batch size
n_layers = 2 # Number of LSTM cells

In [11]:
## helper function creating layers

def new_weights(shape, stddev):
    ## Xavier intialization
    initial = tf.truncated_normal(shape=shape, stddev=stddev,dtype=tf.float32)
    
    return tf.Variable(initial)
## Biases initialization
def new_biases(length):
    initial = tf.constant(value=0, shape=[length], dtype=tf.float32)
    return tf.Variable(initial)

def new_layer(in_size, out_size):
    stddev = np.sqrt(np.float(2)/(in_size + out_size))
    weights = new_weights([in_size, out_size], stddev)
    biases = new_biases(out_size)
    return {'weights': weights, 'biases':biases}

In [12]:
## Placeholder variables to hold input data
X = tf.placeholder(tf.float32, [None, IMG_FLAT_SIZE]) # Input data
X_rnn = tf.reshape(X, [-1, n_chunks, chunk_size]) # Reshape input data
Y = tf.placeholder(tf.float32, [None, N_LABELS]) # Labels
print X
print X_rnn
print Y

Tensor("Placeholder:0", shape=(?, 16384), dtype=float32)
Tensor("Reshape:0", shape=(?, 128, 128), dtype=float32)
Tensor("Placeholder_1:0", shape=(?, 2), dtype=float32)


In [13]:
layer = new_layer(hidden_size, N_LABELS) # Softmax layer, outmost layer of the network
print ('* Layer: {0}'.format(layer))

# hidden_1_layer = new_layer(hidden_size, 100) # Hidden layer 1, outmost layer of the network
# print ('* Hidden Layer 1: {0}'.format(hidden_1_layer))

# softmax_layer = new_layer(100, N_LABELS) # Softmax layer, outmost layer of the network
# print ('* Hidden Layer 1: {0}'.format(softmax_layer))


# 1 cell LSTM
cell = tf.nn.rnn_cell.LSTMCell(hidden_size, state_is_tuple=True) # A single LSTM cell

# Multi LSTM cells
rnn_cells = tf.nn.rnn_cell.MultiRNNCell([cell] * n_layers)
print ('* rnn_cells: {0}'.format(rnn_cells))
outputs_T, states = tf.nn.dynamic_rnn(rnn_cells, X_rnn, dtype=tf.float32)
print ('* output transpose: {0}'.format(outputs_T))

# initial_state = cell.zero_state(batch_size, tf.float32)
# print ('* single cell: {0}'.format(cell))
# ## Single rnn cell
# outputs_T, states = tf.nn.dynamic_rnn(cell=cell, inputs=X_rnn, dtype=tf.float32)
# print ('* outputs transpose: {0}'.format(outputs_T))

outputs = tf.transpose(outputs_T, [1,0,2])
print ('* outputs: {0}'.format(outputs))

# Use output of last step as input for softmax layer
# last_step = tf.gather(outputs, int(outputs.get_shape()[0]) - 1)
# print ('* last_step: {0}'.format(last_step))
# input_softmax = tf.matmul(last_step, layer['weights']) + layer['biases']
# print ('* input_softmax: {0}'.format(input_softmax))
# y_rnn_softmax = tf.nn.softmax(input_softmax)
# print ('* y_rnn_softmax: {0}'.format(y_rnn_softmax))


## Use mean of output of all steps as input for softmax layer
mean_step = tf.reduce_mean(input_tensor=outputs, axis=0)
print ('* mean_step: {0}'.format(mean_step))

# # Hidden layer 1:

# hidden_1 = tf.nn.relu(tf.matmul(mean_step, hidden_1_layer['weights']) + hidden_1_layer['biases'])
# print hidden_1
# input_softmax = tf.matmul(hidden_1, softmax_layer['weights']) + softmax_layer['biases']

input_softmax = tf.matmul(mean_step, layer['weights']) + layer['biases']
print ('* input_softmax: {0}'.format(input_softmax))
y_rnn_softmax = tf.nn.softmax(input_softmax)
print ('* y_rnn_softmax: {0}'.format(y_rnn_softmax))


* Layer: {'weights': <tensorflow.python.ops.variables.Variable object at 0x7f411875c790>, 'biases': <tensorflow.python.ops.variables.Variable object at 0x7f411c0bead0>}
* rnn_cells: <tensorflow.python.ops.rnn_cell.MultiRNNCell object at 0x7f411c0beb50>
* output transpose: Tensor("RNN/transpose:0", shape=(?, 128, 300), dtype=float32)
* outputs: Tensor("transpose_1:0", shape=(128, ?, 300), dtype=float32)
* mean_step: Tensor("Mean:0", shape=(?, 300), dtype=float32)
* input_softmax: Tensor("add:0", shape=(?, 2), dtype=float32)
* y_rnn_softmax: Tensor("Softmax:0", shape=(?, 2), dtype=float32)


In [14]:
LEARNING_RATE = 1e-3
BETA = 1e-3
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=input_softmax,labels=Y)
print cross_entropy
cost = tf.reduce_mean(cross_entropy)
print cost
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)

minimize_cost = optimizer.minimize(cost)
# actual_grads = [grad for grad, _ in optimizer.compute_gradients(cost,[layer['weights']])]
# print actual_grads
# num_grads = tf.gradients(cost, [layer['weights']])
# print num_grads
# errors = tf.contrib.losses.mean_squared_error(actual_grads[0], num_grads[0])
y_true = tf.argmax(Y, dimension=1)
y_pred = tf.argmax(y_rnn_softmax, dimension=1)

Tensor("Reshape_3:0", shape=(?,), dtype=float32)
Tensor("Mean_1:0", shape=(), dtype=float32)


In [15]:
# #slice data for testing model
# X_train = X_train[:4000]
# Y_train = Y_train[:4000]

# print (X_train.shape)
# print (Y_train.shape)
# print (np.unique(np.argmax(Y_train, axis=1), return_counts=True))

In [22]:
classID = {
    0: "AirCon",
    1: "Car horn",
    2: "Children playing",
    3: "Dog bark",
    4: "Drilling",
    5: "Engine idling",
    6: "Gun shot",
    7: "Jackhammer",
    8: "Siren",
    9: "Street music"
}

BATCH_SIZE = 50
TRAINING_EPOCHS = 1
np.random.seed(2017)

## Helper function for optimization
def optimize(train_x, train_y, n_epochs, batch_size, session, saver):
        n_samples = train_x.shape[0]
        sample_IDs = np.arange(n_samples)
        np.random.shuffle(sample_IDs)
        train_x_p, train_y_p = train_x[sample_IDs], train_y[sample_IDs]
        n_iterations = np.int(np.floor(n_samples/batch_size))+1
        start_time = time.time()
        cost_history = np.empty(shape=[1],dtype=float)
        print "Training......."
        print "-- Elapsed time -- Epoch -- Cost value -- "

        for epoch in np.arange(n_epochs+1):
            np.random.shuffle(sample_IDs)
            train_x_p, train_y_p = train_x[sample_IDs], train_y[sample_IDs]
            for itr in np.arange(n_iterations):
                start = (itr * batch_size) % (n_samples - batch_size)
                batch_x, batch_y = train_x_p[start:start + batch_size], train_y_p[start:start + batch_size]
                feed_dict_train = {X: batch_x, Y: batch_y}
                _, c = session.run([minimize_cost, cost], feed_dict=feed_dict_train)
            
                
                
#             if(epoch % (n_epochs/10) == 0):
#                 print "-- {:12.6f} -- {:5d} -- {:10.5f} -- {:15.11f} -- {:15.11f} --".format((time.time() - start_time), 
#                                                                                     epoch, 
#                                                                                     c, 
#                                                                                     np.median(np.absolute(g[0])), 
#                                                                                     np.median(np.absolute(g[1])))
            if(epoch % (n_epochs/10) == 0):
                print "-- {:12.6f} -- {:5d} -- {:10.5f} -- ".format((time.time() - start_time), 
                                                                                    epoch, 
                                                                                    c, 
                                                                                    )
            cost_history = np.append(cost_history,c)
#             Draw weights of convolutional layer
#             if(epoch % (n_epochs/2) == 0):
#                 plot_conv_weights(session, conv_weights[0], 'conv_1', 1, epoch)
#                 plot_conv_weights(session, conv_weights[1], 'conv_2', 1, epoch)
#                 plot_conv_weights(session, conv_weights[2], 'conv_3', 1, epoch)
                  
#         Save model in folder rnn_model
        dir_path = './rnn_model/'
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
        saver.save(sess, 'rnn_model/new_cnn')
        
#         print running time and output cost value graph
        print ("---Running time: %s seconds ---" % (time.time() - start_time))
        print ('*'*50)
        fig = plt.figure(figsize=(10,5))
        plt.plot(cost_history)
        plt.axis([0,epoch,0,np.max(cost_history)])
        plt.show()

def output_log_file(train, valid, test1, test2):
    file_name = pickle_file.split('/')[-1].split('.')[0]
    with open("log/logfile.txt", "ab") as text_file:
        text_file.write('='*60)
        text_file.write('\n')
        text_file.write("Time: {0}\n".format(datetime.now()))
        text_file.write("Data: " + file_name +"\n")    
        
        
#         text_file.write("Number of input samples: {:6d}\n".format(N_SA)
        text_file.write("Number of input features: {:5d}\n".format(IMG_FLAT_SIZE))
#         text_file.write("Number of input labels: {:5d}\n".format(N_LABELS)
        text_file.write("Number of convolutional layer: {0}\n".format(n_conv_layers))
        text_file.write("\tFilter size:\t")
        for idx in np.arange(1, n_conv_layers + 1):
            text_file.write('{0}\t'.format(filter_size[idx]))
        text_file.write("\n")
        text_file.write("\tNumber of filter:\t")
        for idx in np.arange(1, n_conv_layers + 1):
            text_file.write('{0}\t'.format(n_filter[idx]))
        text_file.write("\n")
        text_file.write("Number of fully-connected layer: {0}\n".format(n_fc_layers))
        text_file.write("Hidden units: {:3d}  - {:3d}\n".format(fc_size, N_LABELS))
        text_file.write("Training epochs: {:5d}\n".format(TRAINING_EPOCHS))
        text_file.write("Batch size: {:3d}\n".format(BATCH_SIZE))
                        
        text_file.write('\tTrain\tValid\tTest1\tYoutube\n')
        text_file.write("Fscore\t {:1.2f} \t {:1.2f} \t {:1.2f} \t {:1.2f} \n".format(train[0], 
                                                                                      valid[0], 
                                                                                      test1[0], 
                                                                                      test2[0]))
        text_file.write("Acc\t {:1.2f} \t {:1.2f} \t {:1.2f} \t {:1.2f} \n".format(train[1], 
                                                                                   valid[1], 
                                                                                   test1[1], 
                                                                                   test2[1]))
        
        

## Helper function to print confusion matrix
def make_prediction(test_x, test_y, session, batch_size):
    print "Making prediction......."
    start_time = time.time()
    n_samples = test_x.shape[0]
    sample_IDs = np.arange(n_samples)
    n_iterations = np.int(np.floor(n_samples/batch_size))+1
    pred = np.zeros(n_samples)
    true = np.zeros(n_samples)
    for itr in np.arange(n_iterations):
        start = (itr * batch_size) % (n_samples - batch_size)
        batch_x, batch_y = test_x[start:start + batch_size], test_y[start:start + batch_size]
        feed_dict_test = {X_cnn: batch_x, Y: batch_y}
        pred[start:start + batch_size], true[start:start + batch_size] = session.run([y_pred, y_true], feed_dict=feed_dict_test)

## Performance Evaluation metrics
    ## Accuracy
    accuracy = accuracy_score(true, pred)
    print ("Accuracy: {:3.2f}".format(accuracy))
    ## F-score
    if(N_LABELS == 2):
        p,r,f,s = precision_recall_fscore_support(true, pred, average='binary')
    else:
        p,r,f,s = precision_recall_fscore_support(true, pred, average='macro')
    print ("F-Score: {:3.2f}".format(f))
    
    ## Confusion matrix
    print ("Confusion Matrix")
    print confusion_matrix(true, pred)
#     print ("Analyzing result")
#     for ID, name in classID.items():
#         x = pred[(true == ID)*(pred != ID)]  
#         unique_class, unique_count = np.unique(x, return_counts=True)
#         print ('-'*50)
#         print " Class {0} is mistaken with: ".format(name)
#         for c, y in zip(unique_class, unique_count):        
#             print "# {0}: {1} samples".format(classID[c], y)
#     print ('-'*50)
    print ("---Running time: {0} seconds ---".format((time.time() - start_time)))
    print ('*'*50)
    return accuracy, f
                    

In [23]:
with tf.Session() as sess:
    saver = tf.train.Saver()
    sess.run(tf.global_variables_initializer())
    optimize(X_train, Y_train, TRAINING_EPOCHS, batch_size, sess, saver)
    
    print ("Making prediction on training set")
    train = make_prediction(X_train, Y_train, sess, batch_size)
    print ("Making prediction on validation set")
    valid = make_prediction(X_valid, Y_valid, sess, batch_size)
    print ("Making training prediction on test 1 set")
    test1 = make_prediction(X_test1, Y_test1, sess, batch_size)
    print ("Making training prediction on test 2 set")
    test2 = make_prediction(X_test2, Y_test2, sess, batch_size)
#     output_log_file(train, valid, test1, test2)

Training.......
-- Elapsed time -- Epoch -- Cost value -- 


KeyboardInterrupt: 