# Notebook to run classification on ESC50 or UrbanSound8K datasets. Input are spectrogram images transformed from the audio samples.  Look at DataPrep folder for functions to prepare the spectrograms

### First we load the images then order them into numpy arrays

In [1]:
%matplotlib notebook

import numpy as np
import os
import re
import tensorflow as tf
from scipy import ndimage
import time
from datetime import datetime
import matplotlib.pyplot as plt
import random
from params import *

import model_single2 as m #import single-task learning CNN model

#np.set_printoptions(threshold=np.nan)  #if want to see full output of big arrays in Jupyter notebook

In [2]:
def time_taken(elapsed):
    """To format time taken in hh:mm:ss. Use with time.monotic()"""
    m, s = divmod(elapsed, 60)
    h, m = divmod(m, 60)
    return "%d:%02d:%02d" % (h, m, s)


In [3]:
#functions to save/load numpy arrays to/from file

def save_sets(sets,name):
    """Writes the data array to .npy file. Can be loaded using load_set.
    sets: arrays to be saved. can take a list
    name: string to name the file. follow same order as in sets 
    """ 
    ind = 0
    for x in sets:
        np.save(save_path + '/{}.npy'.format(name[ind]), x)
        ind += 1

def load_set(sets):
    """Load existing data arrays from .npy files. Use if have preexisting data or when you don't to reshuffle the dataset"""
    return np.load('{}.npy'.format(sets))

In [4]:
import utils.spectreader as spectreader

def getImage(fnames, nepochs=None) :
    """ Reads data from the prepaired *list* of files in fnames of TFRecords, does some preprocessing 
    params:
    fnames - list of filenames to read data from
    nepochs - An integer (optional). Just fed to tf.string_input_producer().  Reads through all data num_epochs times before generating an OutOfRange error. None means read forever.
    """
    label, image = spectreader.getImage(fnames, nepochs)

    #same as np.flatten
    # I can't seem to make shuffle batch work on images in their native shapes.
    image=tf.reshape(image,[k_freqbins*k_numFrames])

    # re-define label as a "one-hot" vector 
    # it will be [0,1] or [1,0] here. 
    # This approach can easily be extended to more classes.
    label=tf.stack(tf.one_hot(label-1, N_LABELS))
    print ("getImage returning")
    return label, image



def get_TFR_folds(a_dir, foldnumlist):
    """ Returns a list of files names in a_dir that start with foldX where X is from the foldnumlist"""
    lis = []
    for num in foldnumlist : 
        lis.extend([a_dir + '/' + name for name in os.listdir(a_dir)
            if name.startswith("fold"+str(num))])
    return lis



In [5]:
datanumlist=[1,2,3,4]
validatenumlist=[5]

datafnames=get_TFR_folds("../DataPrep/stft_png", datanumlist)
target, data = getImage(datafnames, nepochs=1)

validatefnames=get_TFR_folds("../DataPrep/stft_png", validatenumlist)
vtarget, vdata = getImage(validatefnames, nepochs=1)

NUM_THREADS=2
k_batchsize = batch_size
k_vbatchsize = 2

imageBatch, labelBatch = tf.train.shuffle_batch(
    [data, target], batch_size=k_batchsize,
    num_threads=NUM_THREADS,
    allow_smaller_final_batch=True, #want to finish an eposh even if datasize doesn't divide by batchsize
    enqueue_many=False, #IMPORTANT to get right, default=False - 
    capacity=1000,  #1000,
    min_after_dequeue=500) #500

vimageBatch, vlabelBatch = tf.train.batch(
    [vdata, vtarget], batch_size=k_vbatchsize,
    num_threads=NUM_THREADS,
    allow_smaller_final_batch=False, #want to finish an eposh even if datasize doesn't divide by batchsize
    enqueue_many=False, #IMPORTANT to get right, default=False - 
    capacity=1000)

getImage ['../DataPrep/stft_png/fold1-00000-of-00001', '../DataPrep/stft_png/fold2-00000-of-00001', '../DataPrep/stft_png/fold3-00000-of-00001', '../DataPrep/stft_png/fold4-00000-of-00001']
TFRecordReader produced Tensor("ReaderNumRecordsProducedV2:0", shape=(), dtype=int64) records
getImage returning
getImage ['../DataPrep/stft_png/fold5-00000-of-00001']
TFRecordReader produced Tensor("ReaderNumRecordsProducedV2_1:0", shape=(), dtype=int64) records
getImage returning


In [8]:


    
sess = tf.Session()
sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))

coord = tf.train.Coordinator()
enqueue_threads = tf.train.start_queue_runners(sess=sess,coord=coord)

try:
    if coord.should_stop():
        print("coord should stop")

    print("labelBatch is " + str(labelBatch))
    X_batch, Y_batch = sess.run([imageBatch, labelBatch])
    print("Y_Batch is " + str(Y_batch))

except (tf.errors.OutOfRangeError) as e:
    coord.request_stop(e)

finally :
    coord.request_stop()
    coord.join(enqueue_threads)    

sess.close()
    
print("done")

labelBatch is Tensor("shuffle_batch:1", shape=(?, 2), dtype=float32)
Y_Batch is [[ 1.  0.]
 [ 0.  1.]]
done


In [9]:
datanumlist=[1,2,3,5]
validatenumlist=[4]

datafnames=get_TFR_folds("../DataPrep/stft_png", datanumlist)
target, data = getImage(datafnames, nepochs=1)

validatefnames=get_TFR_folds("../DataPrep/stft_png", validatenumlist)
vtarget, vdata = getImage(validatefnames, nepochs=1)

NUM_THREADS=2
k_batchsize = batch_size
k_vbatchsize = 2

imageBatch, labelBatch = tf.train.shuffle_batch(
    [data, target], batch_size=k_batchsize,
    num_threads=NUM_THREADS,
    allow_smaller_final_batch=True, #want to finish an eposh even if datasize doesn't divide by batchsize
    enqueue_many=False, #IMPORTANT to get right, default=False - 
    capacity=1000,  #1000,
    min_after_dequeue=500) #500

vimageBatch, vlabelBatch = tf.train.batch(
    [vdata, vtarget], batch_size=k_vbatchsize,
    num_threads=NUM_THREADS,
    allow_smaller_final_batch=False, #want to finish an eposh even if datasize doesn't divide by batchsize
    enqueue_many=False, #IMPORTANT to get right, default=False - 
    capacity=1000)

getImage ['../DataPrep/stft_png/fold1-00000-of-00001', '../DataPrep/stft_png/fold2-00000-of-00001', '../DataPrep/stft_png/fold3-00000-of-00001', '../DataPrep/stft_png/fold5-00000-of-00001']
TFRecordReader produced Tensor("ReaderNumRecordsProducedV2_2:0", shape=(), dtype=int64) records
getImage returning
getImage ['../DataPrep/stft_png/fold4-00000-of-00001']
TFRecordReader produced Tensor("ReaderNumRecordsProducedV2_3:0", shape=(), dtype=int64) records
getImage returning


In [10]:
sess = tf.Session()
sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))

coord = tf.train.Coordinator()
enqueue_threads = tf.train.start_queue_runners(sess=sess,coord=coord)

try:
    if coord.should_stop():
        print("coord should stop")

    print("labelBatch is " + str(labelBatch))
    X_batch, Y_batch = sess.run([imageBatch, labelBatch])
    print("Y_Batch is " + str(Y_batch))

except (tf.errors.OutOfRangeError) as e:
    coord.request_stop(e)

finally :
    coord.request_stop()
    coord.join(enqueue_threads)    

sess.close()
    
print("done")

labelBatch is Tensor("shuffle_batch_1:1", shape=(?, 2), dtype=float32)
Y_Batch is [[ 0.  1.]
 [ 1.  0.]]
done


### Now time to train and test the CNN

In [None]:
# Path for tf.summary.FileWriter and to store model checkpoints
filewriter_path = save_path + "/filewriter/"
checkpoint_path = save_path + "/checkpoint/"

# Create parent path if it doesn't exist
if not os.path.isdir(checkpoint_path): os.mkdir(checkpoint_path)

#********************************************************************

# tf Graph input placeholders
x = tf.placeholder(tf.float32, [batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS])
y = tf.placeholder(tf.int32, [None, N_LABELS])
keep_prob = tf.placeholder(tf.float32, (), name="keepProb") #dropout (keep probability)

# Construct model
pred = m.conv_net(x, m.weights, m.biases, keep_prob)

#L2 regularization
lossL2 = tf.add_n([tf.nn.l2_loss(val) for name,val in m.weights.items()]) * beta #L2 reg on all weight layers
lossL2_onlyfull = tf.add_n([tf.nn.l2_loss(m.weights['wd1']),tf.nn.l2_loss(m.weights['out'])]) * beta #L2 reg on dense layers

# Op for calculating the loss
with tf.name_scope("cross_ent"):
    if l2reg:
        if l2regfull:
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y) + lossL2_onlyfull)
        else:
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y) + lossL2)
    else:
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))

# Train op
with tf.name_scope("train"):
    optimizer = tf.train.AdamOptimizer(epsilon=epsilon).minimize(loss)

# Add the loss to summary
tf.summary.scalar('cross_entropy', loss)

# Predictions
prob = tf.nn.softmax(pred)

# Evaluation op: Accuracy of the model
with tf.name_scope("accuracy"):
    correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1))
    accuracy = 100*tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Add the accuracy to the summary
tf.summary.scalar('accuracy', accuracy)

# Merge all summaries together
merged_summary = tf.summary.merge_all()

# Initialize an saver for store model checkpoints
saver = tf.train.Saver()

In [None]:
max_acc = []
max_epochs = []
fold_list = [x+1 for x in range(max(folds))]
print("Fold List is " + str(fold_list))

total_runs = TOTAL_RUNS

start_time_long = time.monotonic()
text_file = open(save_path + "/stft-double_v2.txt", "w") #save training data
print("{} Open Tensorboard at --logdir {}".format(datetime.now(), filewriter_path))

run = 0
while run < total_runs:
    for i in fold_list:

        test_acc_list = []
        trainfolds=[value for value in fold_list if value != i]
        testfolds=[i]
        
        ##train_set = images[folds!=i] #first get the right fold in order of fold_list
        ##train_label = labels[folds!=i]
        ##test_set = images[folds==i]
        ##test_label = labels[folds==i]

        

        text_file.write('*** Initializing fold #%u as test set ***\n' % i)
        print('*** Initializing fold #%u as test set ***' % i)
        
        print("train_set " + str([folds!=i]))


        # Initialize the FileWriter
        writer = tf.summary.FileWriter(filewriter_path + str(i))
        

        with tf.Session() as sess:

            # Initialize all variables        
            tf.global_variables_initializer().run()

            # Add the model graph to TensorBoard
            writer.add_graph(sess.graph)

            print("{} Start training...".format(datetime.now()))

            start_time = time.monotonic()

            ##for e in range(epochs):
            ##    print("{} Epoch number: {}".format(datetime.now(), e+1))
            ##    text_file.write("{} Epoch number: {}\n".format(datetime.now(), e+1))
            ##    
            ##    #shuffle order every epoch
            ##    train_set, train_label = shuffle(train_set, train_label)
            ##    test_set, test_label = shuffle(test_set, test_label)
            ##
            
            train_set, train_label = sess.run([imageBatch, labelBatch], feed_dict={flist : trainfolds })
            test_set, test_label = sess.run([vimageBatch, vlabelBatch], feed_dict={flist : testfolds }

            e = 0
            step = 1

            while step < train_batches_per_epoch:
                print("step = " + str(step))
                #create training mini-batch here
                offset = (step * batch_size) % (train_label.shape[0] - batch_size)
                batch_data = train_set[offset:(offset + batch_size), :, :, :]
                batch_labels = train_label[offset:(offset + batch_size),:]

                #train and backprop
                sess.run(optimizer, feed_dict= {x:batch_data, y:batch_labels, keep_prob:dropout})

                #run merged_summary to display progress on Tensorboard
                print("run summary")
                if (step % display_step == 0):               
                    s = sess.run(merged_summary, feed_dict={x: batch_data, y: batch_labels, keep_prob: 1.})
                    ##writer.add_summary(s, e*train_batches_per_epoch + step) 
                    writer.add_summary(s, step)
                step += 1

            print("now test")
            test_acc = 0.
            test_count = 0
            for bi in range(test_batches_per_epoch):
                print("test step = " + str(bi))
                #prepare test mini-batch
                offset = (bi * batch_size) % (test_label.shape[0] - batch_size)
                test_batch = test_set[offset:(offset + batch_size), :, :, :]
                label_batch = test_label[offset:(offset + batch_size),:]

                acc = sess.run(accuracy, feed_dict={x: test_batch, y: label_batch, keep_prob: 1.})

                test_acc += acc*batch_size
                test_count += 1*batch_size

            #sometimes we get leftovers if batch_size is not a factor of the training/test set
            if test_extra != 0:
                test_batch = test_set[-test_extra,:,:,:]
                label_batch = test_label[-test_extra:,:]
                acc = sess.run([accuracy,prob], feed_dict={x: test_batch, y: label_batch, keep_prob: 1.})
             
                test_acc += acc*test_extra
                test_count += 1*test_extra

            #calculate total test accuracy
            test_acc /= test_count 
            print("{} Test Accuracy = {:.4f}".format(datetime.now(),test_acc))
            text_file.write("{} Test Accuracy = {:.4f}\n".format(datetime.now(),test_acc))
            test_acc_list.append(test_acc)

            #save checkpoint of the model
            if ((e+1) % checkpoint_epoch == 0):  
                checkpoint_name = os.path.join(checkpoint_path, dataset_name+'model_fold'+str(i)+'_epoch'+str(e+1)+'.ckpt')
                save_path = saver.save(sess, checkpoint_name) 
                print("{} Model checkpoint saved at {}".format(datetime.now(), checkpoint_name))

        # find the max test score and the epoch it belongs to        
        max_acc.append(max(test_acc_list))
        max_epoch = test_acc_list.index(max(test_acc_list))
        max_epochs.append(max_epoch) 

        elapsed_time = time.monotonic() - start_time
        text_file.write("--- Training time taken: {} ---\n".format(time_taken(elapsed_time)))
        print("--- Training time taken:",time_taken(elapsed_time),"---")
        print("------------------------")
        
        # return the max accuracies of each fold and their respective epochs
        print(max_acc)
        print(max_epochs)
    run += 1

writer.close()
elapsed_time_long = time.monotonic() - start_time_long
print("*** All runs completed ***")
text_file.write("Total time taken:")
text_file.write(time_taken(elapsed_time_long))
print("Total time taken:",time_taken(elapsed_time_long))
text_file.close()