## Separates spectrograms into training and test set, then convert them to Tensorflow-friendly numpy arrays to be fed into our CNN.

In [1]:
%matplotlib notebook

import numpy as np
import pandas as pd
import os
from scipy import ndimage
import tensorflow as tf
import matplotlib.pyplot as plt
from IPython import display
import time

image_width = 50  #pixel width ie. time bins
image_height = 37 #pixel height ie. frequency bins.


In [2]:
#functions to save/load numpy arrays to/from file

def save_sets(sets,name):
    """Writes the data array to .npy file. Can be loaded using load_set.
    sets: arrays to be saved. can take a list
    name: string to name the file. follow same order as in sets 
    """ 
    ind = 0
    for x in sets:
        np.save('{}.npy'.format(name[ind]), x)
        ind += 1

def load_set(sets):
    """Load existing data arrays from .npy files. Use if have preexisting data or when you don't to reshuffle the dataset"""
    return np.load('{}.npy'.format(sets))

def time_taken(elapsed):
    """To measure time taken"""
    m, s = divmod(elapsed, 60)
    h, m = divmod(m, 60)
    return "%d:%02d:%02d" % (h, m, s)

In [3]:
#Here we load spectrograms from file and index by class into python dics. Indices can be saved to be reused 
#!!If already have an array to load, skip and directly load that instead (further down)
import random

def get_subdirs(a_dir):
    """ Returns a list of sub directory names in a_dir """ 
    return [name for name in os.listdir(a_dir)
            if (os.path.isdir(os.path.join(a_dir, name)) and not (name.startswith('.')))]

def fetch_files(spec_dir):
    """Returns a python dictionary of classes as keys and the (short)filename as values"""
    data_files = {} 
    i = 0
    class_folders = get_subdirs(spec_dir)
    for folder in class_folders:
        class_files = os.listdir(spec_dir + "/" + folder)
        data_files[i] = class_files #each spectrogram is associated with its class via dictionary key
        i += 1
    return data_files, spec_dir

def getFold(string):
    """get fold no. from ESC-50 dataset using the filename. Labels #1-5"""
    label_pos = string.index("-")
    return string[label_pos-1]

data, data_dir = fetch_files("C:/Users/Huz/Documents/python_scripts/ESC50_multitask/ESC-50-cqt")

In [4]:
#Split the test from training set by proportion specified before (in get_ind).
#Can save the datasets as numpy arrays for future use.
#!!If already have an array to load, skip and directly load that instead (further down)
import collections as c

fold1,fold2,fold3,fold4,fold5 = [],[],[],[],[] #create 5 folds to be used for cross-val
fold1_L,fold2_L,fold3_L,fold4_L,fold5_L = [],[],[],[],[] #mirror with 5 folds for labels

def create_folds(data_files,spec_dir):
    """"Formats test and training data and labels in numpy arrays."""

    class_folders = get_subdirs(spec_dir)
    
    for key, value in data_files.items():
        for filename in value:
            try:
                image_data = np.array(ndimage.imread(spec_dir + "/" + class_folders[key] + "/" 
                                                     + filename).astype(float)) - 0.5
                if image_data.shape != (image_height, image_width):
                    raise Exception('Unexpected image shape: %s' % str(image_data.shape))
                
                fold = getFold(filename) #use Piczak's prearranged folds
                if int(fold) == 1:
                    fold1.append(image_data)
                    fold1_L.append(key)
                elif int(fold) == 2:
                    fold2.append(image_data)
                    fold2_L.append(key)
                elif int(fold) == 3:
                    fold3.append(image_data)
                    fold3_L.append(key)
                elif int(fold) == 4:
                    fold4.append(image_data)
                    fold4_L.append(key)
                elif int(fold) == 5:
                    fold5.append(image_data)
                    fold5_L.append(key)
            except IOError as e:
                print('Could not read:', data_files[data_class][j], ':', e, '- it\'s ok, skipping.')

def shuffle(dataset, labels):
    """Randomizes order of elements in input arrays"""
    permutation = np.random.permutation(labels.shape[0])
    shuffled_dataset = dataset[permutation,:,:]
    shuffled_labels = labels[permutation]
    return shuffled_dataset, shuffled_labels

create_folds(data,data_dir)

fold1, fold1_L = shuffle(np.array(fold1),np.array(fold1_L))
fold2, fold2_L = shuffle(np.array(fold1),np.array(fold1_L))
fold3, fold3_L = shuffle(np.array(fold1),np.array(fold1_L))
fold4, fold4_L = shuffle(np.array(fold1),np.array(fold1_L))
fold5, fold5_L = shuffle(np.array(fold1),np.array(fold1_L))

# print(c.Counter(test_label))

In [5]:
n_labels = 50 #no. of classes
n_channels = 1 #intensity

def reformat(dataset, labels):
    """Reformats to appropriate shape for covnet. Use 1-hot encoding for labels"""
    dataset = dataset.reshape((-1, image_height, image_width, n_channels)).astype(np.float32)
    labels = (np.arange(n_labels) == labels[:,None]).astype(np.float32)
    return dataset, labels

fold1, fold1_L = reformat(fold1, fold1_L)
fold2, fold2_L = reformat(fold2, fold2_L)
fold3, fold3_L = reformat(fold3, fold3_L)
fold4, fold4_L = reformat(fold4, fold4_L)
fold5, fold5_L = reformat(fold5, fold5_L)
print('Fold1 size:', fold1.shape, fold1_L.shape)
print('Fold2 size:', fold2.shape, fold2_L.shape)
print('Fold3 size:', fold3.shape, fold3_L.shape)
print('Fold4 size:', fold4.shape, fold4_L.shape)
print('Fold5 size:', fold5.shape, fold5_L.shape)

Fold1 size: (400, 37, 50, 1) (400, 50)
Fold2 size: (400, 37, 50, 1) (400, 50)
Fold3 size: (400, 37, 50, 1) (400, 50)
Fold4 size: (400, 37, 50, 1) (400, 50)
Fold5 size: (400, 37, 50, 1) (400, 50)


### Now train and test CNN

In [26]:
# input_queue = tf.train.slice_input_producer([fold1, fold1_L],shuffle=True)
# image_batch, label_batch = tf.train.batch([input_queue[0], input_queue[1]],
#                                           batch_size=batch_size)
# with tf.Session() as sess:
  
#   # initialize the variables
#     tf.global_variables_initializer().run()
  
#     a = sess.run(input_queue)
#     print(a)
big_fold = np.empty([2000,37,50,1])
big_fold[0:400,:,:,:]=fold1
big_fold[400:800,:,:,:]=fold2
big_fold[800:1200,:,:,:]=fold3
big_fold[1200:1600,:,:,:]=fold4
big_fold[1600:2000,:,:,:]=fold5
print(a.shape)

In [6]:
cwd = os.getcwd() #current working folder

# Parameters
#if want to use gradient descent tune below else use different optimizer
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.05
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           10000, 0.96, staircase=True)
#learning_rate = 0.01
batch_size = 16
num_steps = 101
display_step = 10
graph_step = 50

# Network Parameters
dropout = .5 # Dropout, probability to keep units

def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

def conv_net(x, weights, biases, dropout):
    # Convolution Layer
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    #w_h = tf.summary.histogram("conv1",conv1)
    # Max Pooling (down-sampling)
    conv1 = maxpool2d(conv1, k=2)
    conv1 = tf.nn.dropout(conv1, dropout)
    
    # Convolution Layer
    #conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    # Max Pooling (down-sampling)
    #conv2 = maxpool2d(conv2, k=2)
    
    # Convolution Layer
    #conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
    # Max Pooling (down-sampling)
    #conv3 = maxpool2d(conv3, k=2)

    # Fully connected layer
    # Reshape conv3 output to fit fully connected layer input
    fc1 = tf.reshape(conv1, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    # Apply Dropout
    fc1 = tf.nn.dropout(fc1, dropout)
    
    # Fully connected layer
    #fc2 = tf.add(tf.matmul(fc1, weights['wd2']), biases['bd2'])
    #fc2 = tf.nn.relu(fc2)
    # Apply Dropout
    #fc2 = tf.nn.dropout(fc2, dropout)

    # Output, class prediction
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out


# tf Graph input
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.float32)
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
    
# Store layers weight & bias
weights = {
    # 5x5 conv, 1 input, 24 outputs
    'wc1': tf.Variable(tf.truncated_normal([37, 5, 1, 128], stddev=0.1)),
    # 5x5 conv, 24 inputs, 48 outputs
    'wc2': tf.Variable(tf.truncated_normal([19, 5, 128, 180], stddev=0.1)),
    # 5x5 conv, 48 inputs, 48 outputs
    'wc3': tf.Variable(tf.truncated_normal([5, 5, 48, 48], stddev=0.1)),
    # fully connected, (1025//4=257)*(196//4=49)*64 inputs, 1024 outputs
    'wd1': tf.Variable(tf.truncated_normal([10*13*180, 1200], stddev=0.1)),
    # fully connected, (1025//4=257)*(196//4=49)*64 inputs, 1024 outputs
    'wd2': tf.Variable(tf.truncated_normal([1000, 1000], stddev=0.1)),
    # 1024 inputs, 50 outputs (class prediction)
    'out': tf.Variable(tf.truncated_normal([1200, n_labels], stddev=0.1))
}

biases = {
    'bc1': tf.Variable(tf.zeros([128])),
    'bc2': tf.Variable(tf.constant(1.0,shape=[180])),
    'bc3': tf.Variable(tf.constant(1.0,shape=[48])),
    'bd1': tf.Variable(tf.constant(1.0,shape=[1200])),
    'bd2': tf.Variable(tf.constant(1.0,shape=[1200])),
    'out': tf.Variable(tf.constant(1.0,shape=[n_labels]))
}
       
# Construct model
pred = conv_net(x, weights, biases, keep_prob)
      
# Define cost function
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
    
# Optimizer.
#optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost,global_step=global_step)
optimizer = tf.train.AdamOptimizer().minimize(cost)
  
# Predictions for the training, validation, and test data.
prob = tf.nn.softmax(pred)
#valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
#test_prediction = tf.nn.softmax(conv_net(x, weights, biases, keep_prob))
    
# Evaluate model
correct_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1))
accuracy = 100*tf.reduce_mean(tf.cast(correct_pred, tf.float32))

save_all = tf.train.Saver() #create saver handle
#filter_summary = tf.summary.image("conv1",weights['wc1'])
#w_h = tf.summary.histogram("weights", tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']))
#b_h = tf.summary.histogram("biases", biases['bd1'])
cost_h = tf.summary.scalar("cost_function", cost)



In [7]:
steps = []
accs = []
ls = []

with tf.Session() as session:
    # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(cwd + '/results/stft/train',session.graph)
    test_writer = tf.summary.FileWriter(cwd + '/results/stft/test')
    tf.global_variables_initializer().run()
    print('Initialized')
    start_time = time.monotonic()
    
    for step in range(num_steps):
        offset = (step * batch_size) % (train_label.shape[0] - batch_size)
        batch_data = train_set[offset:(offset + batch_size), :, :, :]
        batch_labels = train_label[offset:(offset + batch_size), :]

        l, acc, train_summary = session.run([cost, accuracy, merged],
                                                feed_dict= {x:batch_data, y:batch_labels,keep_prob:dropout})
        
        #_, l, acc = session.run([optimizer, cost, accuracy],
        #                        feed_dict= {x:batch_data, y:batch_labels,keep_prob:dropout})
        if (step % display_step == 0):
            train_writer.add_summary(train_summary, step)
            print('loss at step %d: %f' % (step, l))
            print('training accuracy: %.1f%%' % acc)
        
        if (step % graph_step == 0):
            steps.append(step)
            accs.append(acc)
            ls.append(l)
    
    #save_all.save(session, cwd + '/model3/data-all.chkp') #save model
    elapsed_time = time.monotonic() - start_time
    print("Training time taken:",time_taken(elapsed_time))
        
    test_acc, test_summary = session.run([accuracy,merged], 
                                    feed_dict={x:test_set, y:test_label, keep_prob:1.}) #run model on test set
    test_writer.add_summary(test_summary)
    print("Testing Accuracy:",test_acc,"%")
        
    #Plot graph of accuracy and loss against no. of steps
    fig, ax1 = plt.subplots()
    ax1.set_xlabel('steps')
    ax1.set_ylabel('accuracy', color='b')
    ax1.tick_params('y', colors='b')
    ax1.set_ylim(0, 100)
    ax1.plot(steps,accs,'b-')
    ax2 = ax1.twinx()
    ax2.plot(steps,ls,'r-')
    ax2.set_ylabel('loss', color='r')
    ax2.tick_params('y', colors='r')
    ax1.plot(step,test_acc,'go')
    fig.tight_layout()
    #fig.savefig('C:/Users/Huz/Documents/python_scripts/ESC50/results/kernel_size/test2.png', dpi=fig.dpi)

Initialized


TypeError: The value of a feed cannot be a tf.Tensor object. Acceptable feed values include Python scalars, strings, lists, or numpy ndarrays.

END OF CODE (IGNORE BELOW)

In [None]:
from sklearn.model_selection import KFold, cross_val_score
num_steps = 500

def accuracy(predictions, labels):
      return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
              / predictions.shape[0])

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')
    k_fold = KFold(n_splits=n_splits,shuffle=True)
    for train_ind, valid_ind in k_fold.split(train_set):
        batch_data = train_set[train_ind, :, :, :]
        batch_label = train_label[train_ind, :]
        valid_data = train_set[valid_ind, :, :, :]
        valid_label = train_label[valid_ind, :]
        for step in range(num_steps):
            #offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
            #batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
            #batch_labels = train_labels[offset:(offset + batch_size), :]
        
            feed_dict = {tf_train_dataset:batch_data, tf_train_labels:batch_label, tf_valid_dataset:valid_data}
            _, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
            if (step % 50 == 0):
                print('Minibatch loss at step %d: %f' % (step, l))
                print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_label))
                print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_label))
        print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_label))

In [None]:
from sklearn.model_selection import KFold, cross_val_score   
    
k_fold = KFold(n_splits=4,shuffle=True)

for train_indices, test_indices in k_fold.split(train_set):
    #print('Train: %s | test: %s' % (train_indices, test_indices))
    #print(test_indices.shape)
    break

for train_ind, valid_ind in k_fold.split(train_set):
    batch_data = train_set[train_ind, :, :, :]
    batch_label = train_label[train_ind, :]
    valid_data = train_set[valid_ind, :, :, :]
    valid_label = train_label[valid_ind, :]
    print(batch_data.shape)
    print(batch_label.shape)
    print(valid_data.shape)
    print(valid_label.shape)
    #test_set = np.array([data_files[data_class][i] for i in test_indices])  