# Notebook to run classification on ESC50 or UrbanSound8K datasets. Input are spectrogram images transformed from the audio samples.  Look at DataPrep folder for functions to prepare the spectrograms

### First we load the images then order them into numpy arrays

In [1]:
%matplotlib notebook

import numpy as np
import os
import re
import tensorflow as tf
import time
from datetime import datetime
import matplotlib.pyplot as plt
from params import *

import model_single2 as m #import single-task learning CNN model

import utils.pickledModel as pickledModel
import utils.spectreader as spectreader

#np.set_printoptions(threshold=np.nan)  #if want to see full output of big arrays in Jupyter notebook

In [2]:
def time_taken(elapsed):
    """To format time taken in hh:mm:ss. Use with time.monotic()"""
    m, s = divmod(elapsed, 60)
    h, m = divmod(m, 60)
    return "%d:%02d:%02d" % (h, m, s)

In [3]:
#functions to save/load numpy arrays to/from file

def save_sets(sets,name):
    """Writes the data array to .npy file. Can be loaded using load_set.
    sets: arrays to be saved. can take a list
    name: string to name the file. follow same order as in sets 
    """ 
    ind = 0
    for x in sets:
        np.save(save_path + '/{}.npy'.format(name[ind]), x)
        ind += 1

def load_set(sets):
    """Load existing data arrays from .npy files. Use if have preexisting data or when you don't to reshuffle the dataset"""
    return np.load('{}.npy'.format(sets))

In [4]:
if FRE_ORIENTATION is "2D":
    k_height = K_FREQBINS
    k_inputChannels = 1
elif FRE_ORIENTATION is "1D":
    k_height = 1
    k_inputChannels = K_FREQBINS    
else:
    raise ValueError("please only enter '1D' or '2D'")


# Create list of paramters for serializing so that network can be properly reconstructed, and for documentation purposes
parameters={
    'k_height' : k_height, 
    'k_numFrames' : K_NUMFRAMES, 
    'k_inputChannels' : k_inputChannels, 
    'K_NUMCONVLAYERS' : m.K_NUMCONVLAYERS, 
    'L1_CHANNELS' : L1_CHANNELS, 
    'L2_CHANNELS' : m.L2_CHANNELS, 
    'FC_SIZE' : FC_SIZE, 
    'K_ConvRows' : m.K_ConvRows, 
    'K_ConvCols' : m.K_ConvCols, 
    'k_ConvStrideRows' : m.k_ConvStrideRows, 
    'k_ConvStrideCols' : m.k_ConvStrideCols, 
    'k_poolRows' : m.k_poolRows, 
    'k_poolStrideRows' : m.k_poolStrideRows, 
    'k_downsampledHeight' : m.k_downsampledHeight, 
    'k_downsampledWidth' : m.k_downsampledWidth,
    'freqorientation' : FRE_ORIENTATION
}

In [5]:
def getImage(fnames, fre_orientation, nepochs=None) :
    """ Reads data from the prepaired *list* of files in fnames of TFRecords, does some preprocessing 
    params:
    fnames - list of filenames to read data from
    nepochs - An integer (optional). Just fed to tf.string_input_producer().  Reads through all data num_epochs times before generating an OutOfRange error. None means read forever.
    """
    label, image = spectreader.getImage(fnames, nepochs)

    #same as np.flatten
    #image=tf.reshape(image,[k_freqbins*k_numFrames]) 
    
    #no need to flatten - must just be explicit about shape so that shuffle_batch will work
    image = tf.reshape(image,[K_FREQBINS,K_NUMFRAMES,NUM_CHANNELS])
    if fre_orientation is "1D":
        image = tf.transpose(image, perm=[0,3,2,1]) #moves freqbins from height to channel dimension

    # re-define label as a "one-hot" vector 
    # it will be [0,1] or [1,0] here. 
    # This approach can easily be extended to more classes.
    label=tf.stack(tf.one_hot(label-1, N_LABELS))
    print ("getImage returning")
    return label, image

def get_TFR_folds(a_dir, foldnumlist):
    """ Returns a list of files names in a_dir that start with foldX where X is from the foldnumlist"""
    lis = []
    for num in foldnumlist : 
        lis.extend([a_dir + '/' + name for name in os.listdir(a_dir)
            if name.startswith("fold"+str(num))])
    return lis


sess = tf.Session()
sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))

coord = tf.train.Coordinator()
enqueue_threads = tf.train.start_queue_runners(sess=sess,coord=coord)

try:
    if coord.should_stop():
        print("coord should stop")

    print("labelBatch is " + str(labelBatch))
    X_batch, Y_batch = sess.run([imageBatch, labelBatch])
    print("Y_Batch is " + str(Y_batch))

except (tf.errors.OutOfRangeError) as e:
    coord.request_stop(e)

finally :
    coord.request_stop()
    coord.join(enqueue_threads)    

sess.close()
    
print("done")
tf.reset_default_graph 

### Now time to train and test the CNN


foldlist=[1,2,3,4,5]
fold=5
datanumlist=[x for x in foldlist if x != fold]
validatenumlist=[fold]

datafnames=get_TFR_folds("../DataPrep/stft_png", datanumlist)
target, data = getImage(datafnames, nepochs=EPOCHS)

validatefnames=get_TFR_folds("../DataPrep/stft_png", validatenumlist)
vtarget, vdata = getImage(validatefnames)

NUM_THREADS=2
#k_batchsize = batch_size
#k_vbatchsize = 2

imageBatch, labelBatch = tf.train.shuffle_batch(
    [data, target], batch_size=BATCH_SIZE,
    num_threads=NUM_THREADS,
    allow_smaller_final_batch=True, #want to finish an eposh even if datasize doesn't divide by batchsize
    enqueue_many=False, #IMPORTANT to get right, default=False - 
    capacity=1000,  #1000,
    min_after_dequeue=500) #500

vimageBatch, vlabelBatch = tf.train.batch(
    [vdata, vtarget], batch_size=BATCH_SIZE,
    num_threads=NUM_THREADS,
    allow_smaller_final_batch=True, #want to finish an eposh even if datasize doesn't divide by batchsize
    enqueue_many=False, #IMPORTANT to get right, default=False - 
    capacity=1000)

In [6]:
# Path for tf.summary.FileWriter and to store model checkpoints
filewriter_path = save_path + "/filewriter/"
checkpoint_path = save_path + "/checkpoint/"

# Create parent path if it doesn't exist
if not os.path.isdir(checkpoint_path): os.mkdir(checkpoint_path)

#********************************************************************

# tf Graph input placeholders
if FRE_ORIENTATION is "2D":
    x = tf.placeholder(tf.float32, [BATCH_SIZE, K_FREQBINS, K_NUMFRAMES, NUM_CHANNELS])
elif FRE_ORIENTATION is "1D":
    x = tf.placeholder(tf.float32, [BATCH_SIZE, NUM_CHANNELS, K_NUMFRAMES, K_FREQBINS])

y = tf.placeholder(tf.int32, [None, N_LABELS])
keep_prob = tf.placeholder(tf.float32, (), name="keepProb") #dropout (keep probability)

# Construct model
pred = m.conv_net(x, m.weights, m.biases, keep_prob)

#L2 regularization
lossL2 = tf.add_n([tf.nn.l2_loss(val) for name,val in m.weights.items()]) * beta #L2 reg on all weight layers
lossL2_onlyfull = tf.add_n([tf.nn.l2_loss(m.weights['wd1']),tf.nn.l2_loss(m.weights['wout'])]) * beta #L2 reg on dense layers

# Op for calculating the loss
with tf.name_scope("cross_ent"):
    if l2reg:
        if l2regfull:
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y) + lossL2_onlyfull)
        else:
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y) + lossL2)
    else:
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))

# Train op
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(epsilon=epsilon).minimize(loss)

# Add the loss to summary
tf.summary.scalar('cross_entropy', loss)

# Predictions
prob = tf.nn.softmax(pred)

# Evaluation op: Accuracy of the model
with tf.name_scope("accuracy"):
    correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1))
    accuracy = 100*tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Add the accuracy to the summary
tf.summary.scalar('accuracy', accuracy)

# Merge all summaries together
merged_summary = tf.summary.merge_all()

# Initialize an saver for store model checkpoints
saver = tf.train.Saver()

In [7]:
NUM_THREADS = 4
foldlist = [1,2,3,4,5]
max_acc = []
max_epochs = []

start_time_long = time.monotonic()
text_file = open(save_path + "/stft-double_v2.txt", "w") #save training data
print("{} Open Tensorboard at --logdir {}".format(datetime.now(), filewriter_path))

for fold in foldlist:
    
    test_acc_list = []
    
    datanumlist=[x for x in foldlist if x != fold]
    validatenumlist=[fold]

    datafnames=get_TFR_folds(INDIR, datanumlist)
    target, data = getImage(datafnames, FRE_ORIENTATION, nepochs=EPOCHS)

    validatefnames=get_TFR_folds(INDIR, validatenumlist)
    vtarget, vdata = getImage(validatefnames, FRE_ORIENTATION)

    imageBatch, labelBatch = tf.train.shuffle_batch(
        [data, target], batch_size=BATCH_SIZE,
        num_threads=NUM_THREADS,
        allow_smaller_final_batch=True, #want to finish an eposh even if datasize doesn't divide by batchsize
        enqueue_many=False, #IMPORTANT to get right, default=False - 
        capacity=1000,  #1000,
        min_after_dequeue=500) #500

    vimageBatch, vlabelBatch = tf.train.batch(
        [vdata, vtarget], batch_size=BATCH_SIZE,
        num_threads=NUM_THREADS,
        allow_smaller_final_batch=True, #want to finish an eposh even if datasize doesn't divide by batchsize
        enqueue_many=False, #IMPORTANT to get right, default=False - 
        capacity=1000)


    text_file.write('*** Initializing fold #%u as test set ***\n' % fold)
    print('*** Initializing fold #%u as test set ***' % fold)

    # Initialize the FileWriter
    writer = tf.summary.FileWriter(filewriter_path + str(fold))

    with tf.Session() as sess:

        # Initialize all variables        
        sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))
        # Add the model graph to TensorBoard
        writer.add_graph(sess.graph)

        coord = tf.train.Coordinator()
        enqueue_threads = tf.train.start_queue_runners(sess=sess,coord=coord)

        print("{} Start training...".format(datetime.now()))
        start_time = time.monotonic()

        try:
            if coord.should_stop():
                print("coord should stop")

            e = 1
            step = 1
            print("{} Epoch number: {}".format(datetime.now(), e))

            while True:  # for each minibatch until data runs out after specified number of epochs
                if coord.should_stop():
                    print("data feed done, quitting")
                    break

                #create training mini-batch here
                batch_data, batch_labels = sess.run([imageBatch, labelBatch])
                #train and backprop
                sess.run(optimizer, feed_dict= {x:batch_data, y:batch_labels, keep_prob:dropout})

                #print("step = " + str(step))

                #run merged_summary to display progress on Tensorboard
                #print("run summary")
                if (step % display_step == 0):               
                    s = sess.run(merged_summary, feed_dict={x: batch_data, y: batch_labels, keep_prob: 1.})
                    ##writer.add_summary(s, e*train_batches_per_epoch + step) 
                    writer.add_summary(s, step)

                if (step % testNSteps == 0):
                    test_acc = 0.
                    test_count = 0
                    #print("now test for " + str(test_batches_per_epoch) + " test steps")
                    for j in range(test_batches_per_epoch):
                        #print("test step = " + str(j))
                        try:
                            #prepare test mini-batch
                            test_batch, label_batch = sess.run([vimageBatch, vlabelBatch])

                            acc = sess.run(accuracy, feed_dict={x: test_batch, y: label_batch, keep_prob: 1.})
                            test_acc += acc*BATCH_SIZE
                            test_count += 1*BATCH_SIZE
                        except (Exception) as ex: #triggered if we run out of validation data to feed queue
                            print(ex)

                    #calculate total test accuracy
                    test_acc /= test_count 
                    print("{} Test Accuracy = {:.4f}".format(datetime.now(),test_acc))
                    text_file.write("{} Test Accuracy = {:.4f}\n".format(datetime.now(),test_acc))
                    test_acc_list.append(test_acc)

                if (step % train_batches_per_epoch == 0):
                    e += 1
                    print("{} Epoch number: {}".format(datetime.now(), e))
                     #save checkpoint of the model
                    if (e % checkpoint_epoch == 0):  
                        checkpoint_name = os.path.join(checkpoint_path, dataset_name+'model_fold'+str(fold)+'_epoch'+str(e)+'.ckpt')
                        saver.save(sess, checkpoint_name) 
                        print("{} Model checkpoint saved at {}".format(datetime.now(), checkpoint_name))

                step += 1

        except (tf.errors.OutOfRangeError) as ex:
            coord.request_stop(ex)

        finally :
            coord.request_stop()
            coord.join(enqueue_threads)                                      
        
        # find the max test score and the epoch it belongs to        
        max_acc.append(max(test_acc_list))
        max_epoch = test_acc_list.index(max(test_acc_list))+1
        max_epochs.append(max_epoch) 

        elapsed_time = time.monotonic() - start_time
        print(elapsed_time)
        text_file.write("--- Training time taken: {} ---\n".format(time_taken(elapsed_time)))
        print("--- Training time taken:",time_taken(elapsed_time),"---")
        print("------------------------")

        # return the max accuracies of each fold and their respective epochs
        print(max_acc)
        print(max_epochs)

    sess.close()
    #tf.reset_default_graph() 

writer.close()
elapsed_time_long = time.monotonic() - start_time_long
print("*** All runs completed ***")
text_file.write("Total time taken:")
text_file.write(time_taken(elapsed_time_long))
print("Total time taken:",time_taken(elapsed_time_long))
text_file.close()

2017-07-25 14:05:23.419506 Open Tensorboard at --logdir ../Results/filewriter/
getImage ['../DataPrep/stft_png/fold2-00003-of-00004', '../DataPrep/stft_png/fold2-00000-of-00004', '../DataPrep/stft_png/fold2-00001-of-00004', '../DataPrep/stft_png/fold2-00002-of-00004', '../DataPrep/stft_png/fold3-00000-of-00004', '../DataPrep/stft_png/fold3-00002-of-00004', '../DataPrep/stft_png/fold3-00001-of-00004', '../DataPrep/stft_png/fold3-00003-of-00004', '../DataPrep/stft_png/fold4-00002-of-00004', '../DataPrep/stft_png/fold4-00003-of-00004', '../DataPrep/stft_png/fold4-00001-of-00004', '../DataPrep/stft_png/fold4-00000-of-00004', '../DataPrep/stft_png/fold5-00001-of-00004', '../DataPrep/stft_png/fold5-00002-of-00004', '../DataPrep/stft_png/fold5-00000-of-00004', '../DataPrep/stft_png/fold5-00003-of-00004']
TFRecordReader produced Tensor("ReaderNumRecordsProducedV2:0", shape=(), dtype=int64) records
getImage returning
getImage ['../DataPrep/stft_png/fold1-00002-of-00004', '../DataPrep/stft_png/f

TFRecordReader produced Tensor("ReaderNumRecordsProducedV2_9:0", shape=(), dtype=int64) records
getImage returning
*** Initializing fold #5 as test set ***
2017-07-25 14:08:13.144403 Start training...
2017-07-25 14:08:13.144536 Epoch number: 1
2017-07-25 14:08:25.288163 Test Accuracy = 5.5000
2017-07-25 14:08:25.288854 Epoch number: 2
2017-07-25 14:08:33.271746 Test Accuracy = 7.0000
2017-07-25 14:08:33.271954 Epoch number: 3
2017-07-25 14:08:41.129375 Test Accuracy = 9.5000
2017-07-25 14:08:41.131456 Epoch number: 4
2017-07-25 14:08:49.084533 Test Accuracy = 9.7500
2017-07-25 14:08:49.084848 Epoch number: 5
2017-07-25 14:08:56.858710 Test Accuracy = 14.2500
2017-07-25 14:08:56.859165 Epoch number: 6
43.97479528400072
--- Training time taken: 0:00:43 ---
------------------------
[19.500000524520875, 21.000000572204591, 16.000000286102296, 15.250000190734863, 14.250000476837158]
[5, 5, 5, 5, 5]
*** All runs completed ***
Total time taken: 0:03:33


In [None]:
a = {a:1}

In [14]:
sess = tf.InteractiveSession()

a = tf.Print(m.weights['wc1'],[m.weights['wc1']])
#print(a)
#         print(m.weights)
#         print(m.biases)
#     trainable = [m.weights,m.biases]
#     state={}
#     for v in trainable[0] :
#         state[v.name] = sess.run(v)
#     print(state)

In [None]:
meta_graph_def = tf.train.export_meta_graph(filename=OUTDIR + '/my-model.meta')
pickledModel.saveState(sess, trainable, parameters, OUTDIR + '/state.pickle') 

foldlist=[1,2,3,4,5]
fold=4
datanumlist=[x for x in foldlist if x != fold]
validatenumlist=[fold]

datafnames=get_TFR_folds("../DataPrep/stft_png", datanumlist)
target, data = getImage(datafnames, nepochs=EPOCHS)

validatefnames=get_TFR_folds("../DataPrep/stft_png", validatenumlist)
vtarget, vdata = getImage(validatefnames)

NUM_THREADS=2
#k_batchsize = batch_size
#k_vbatchsize = 2

imageBatch, labelBatch = tf.train.shuffle_batch(
    [data, target], batch_size=BATCH_SIZE,
    num_threads=NUM_THREADS,
    allow_smaller_final_batch=True, #want to finish an eposh even if datasize doesn't divide by batchsize
    enqueue_many=False, #IMPORTANT to get right, default=False - 
    capacity=1000,  #1000,
    min_after_dequeue=500) #500

vimageBatch, vlabelBatch = tf.train.batch(
    [vdata, vtarget], batch_size=BATCH_SIZE,
    num_threads=NUM_THREADS,
    allow_smaller_final_batch=True, #want to finish an eposh even if datasize doesn't divide by batchsize
    enqueue_many=False, #IMPORTANT to get right, default=False - 
    capacity=1000)

max_acc = []
max_epochs = []
test_acc_list = []

start_time_long = time.monotonic()
text_file = open(save_path + "/stft-double_v2.txt", "w") #save training data
print("{} Open Tensorboard at --logdir {}".format(datetime.now(), filewriter_path))

text_file.write('*** Initializing fold #%u as test set ***\n' % fold)
print('*** Initializing fold #%u as test set ***' % fold)

# Initialize the FileWriter
writer = tf.summary.FileWriter(filewriter_path + str(fold))

with tf.Session() as sess:

    # Initialize all variables        
    sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))
    # Add the model graph to TensorBoard
    writer.add_graph(sess.graph)
    
    coord = tf.train.Coordinator()
    enqueue_threads = tf.train.start_queue_runners(sess=sess,coord=coord)

    print("{} Start training...".format(datetime.now()))
    start_time = time.monotonic()

    try:
        if coord.should_stop():
            print("coord should stop")

        e = 1
        step = 1
        print("{} Epoch number: {}".format(datetime.now(), e))

        while True:  # for each minibatch until data runs out after specified number of epochs
            if coord.should_stop():
                print("data feed done, quitting")
                break

            #create training mini-batch here
            batch_data, batch_labels = sess.run([imageBatch, labelBatch])
            #train and backprop
            sess.run(optimizer, feed_dict= {x:batch_data, y:batch_labels, keep_prob:dropout})
            
            #print("step = " + str(step))
                
            #run merged_summary to display progress on Tensorboard
            #print("run summary")
            if (step % display_step == 0):               
                s = sess.run(merged_summary, feed_dict={x: batch_data, y: batch_labels, keep_prob: 1.})
                ##writer.add_summary(s, e*train_batches_per_epoch + step) 
                writer.add_summary(s, step)

            if (step % testNSteps == 0):
                test_acc = 0.
                test_count = 0
                #print("now test for " + str(test_batches_per_epoch) + " test steps")
                for j in range(test_batches_per_epoch):
                    #print("test step = " + str(j))
                    try:
                        #prepare test mini-batch
                        test_batch, label_batch = sess.run([vimageBatch, vlabelBatch])

                        acc = sess.run(accuracy, feed_dict={x: test_batch, y: label_batch, keep_prob: 1.})
                        test_acc += acc*BATCH_SIZE
                        test_count += 1*BATCH_SIZE
                    except (Exception) as ex: #triggered if we run out of validation data to feed queue
                        print(ex)

                #calculate total test accuracy
                test_acc /= test_count 
                print("{} Test Accuracy = {:.4f}".format(datetime.now(),test_acc))
                text_file.write("{} Test Accuracy = {:.4f}\n".format(datetime.now(),test_acc))
                test_acc_list.append(test_acc)
            
            if (step % train_batches_per_epoch == 0):
                e += 1
                print("{} Epoch number: {}".format(datetime.now(), e))
                 #save checkpoint of the model
                if (e % checkpoint_epoch == 0):  
                    checkpoint_name = os.path.join(checkpoint_path, dataset_name+'model_fold'+str(fold)+'_epoch'+str(e)+'.ckpt')
                    saver.save(sess, checkpoint_name) 
                    print("{} Model checkpoint saved at {}".format(datetime.now(), checkpoint_name))

            step += 1
                                              
    except (tf.errors.OutOfRangeError) as ex:
        coord.request_stop(ex)

    finally :
        coord.request_stop()
        coord.join(enqueue_threads)                                      
                                    
# find the max test score and the epoch it belongs to        
max_acc.append(max(test_acc_list))
max_epoch = test_acc_list.index(max(test_acc_list))+1
max_epochs.append(max_epoch) 

elapsed_time = time.monotonic() - start_time
print(elapsed_time)
text_file.write("--- Training time taken: {} ---\n".format(time_taken(elapsed_time)))
print("--- Training time taken:",time_taken(elapsed_time),"---")
print("------------------------")

# return the max accuracies of each fold and their respective epochs
print(max_acc)
print(max_epochs)


writer.close()
elapsed_time_long = time.monotonic() - start_time_long
print("*** Fold completed ***")
text_file.write("Total time taken:")
text_file.write(time_taken(elapsed_time_long))
print("Total time taken:",time_taken(elapsed_time_long))
text_file.close()