In [4]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import argparse, os, sys

from __future__ import division, print_function, absolute_import
from util_functions import process_files, random_mini_batches

FLAGS = None

# load the data
x_train, y_train, f_train = process_files(
                                    dataset='training', 
                                    features=['Mel', 'Mel_deltas'], 
                                    shape='flat')

x_test, y_test, f_test = process_files(
                                    dataset='test', 
                                    features=['Mel', 'Mel_deltas'], 
                                    shape='flat')

x_train = x_train.astype(np.float32)
x_test = x_test.astype(np.float32)

tf.logging.set_verbosity(tf.logging.INFO)

In [5]:
#Seeding
seed = 3

# input image dimensions
input_d = x_train.shape[1] #Depth
input_h = x_train.shape[2] #Height
input_w = x_train.shape[3] #Width

num_classes = 2

In [6]:
def sound_net(features, labels, mode):

    predictions = None
    loss = None
    train_op = None
    eval_metric_ops = None
    
    # Input Layer
    x = tf.reshape(features["x"], [-1, input_h, input_w, input_d])

    # Convolutional Layer #1
    # input shape [batch_size, input_h, input_w, input_d]
    # output shape [batch_size, input_h, input_w, 80]        
    conv1 = tf.layers.conv2d(inputs=x, filters=80, kernel_size=[57, 6], padding="same", 
                             activation=tf.nn.relu, name='conv1')

    # Pooling Layer #1
    # First max pooling layer with a 2x2 filter and stride of 2
    # input shape [batch_size, input_h, input_w, 80]
    # output shape [batch_size, input_h/2, input_w/2, 80]
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], padding="same", strides=2)
    pool1 = tf.layers.dropout(inputs=pool1, rate=FLAGS.dropout, training=mode == tf.estimator.ModeKeys.TRAIN, name='pool1')

    # Convolutional Layer #2
    # input shape [batch_size, input_h/2, input_w/2, 80]
    # output shape [batch_size, input_h/2, input_w/2, 80]
    conv2 = tf.layers.conv2d(inputs=pool1, filters=80, kernel_size=[1, 3], 
                             padding="same", activation=tf.nn.relu, name='conv2')
    
    # Pooling Layer #2
    # Input Tensor Shape: [batch_size, input_h/2, input_w/2, 80]
    # Output Tensor Shape: [batch_size, input_h/16 input_w/14, 80]    
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[8, 7], padding="same", 
                                    strides=[8, 7], name='pool1')

    # Fully connected layer #1
    # Reshape conv2 output to fit fully connected layer input
    pool2_flat = tf.reshape(pool2, [-1, int(input_h/16)*int(input_w/14)*80])    
    dense1 = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
    dense1 = tf.layers.dropout(inputs=dense1, rate=FLAGS.dropout, training=mode == tf.estimator.ModeKeys.TRAIN)

    # Fully connected layer #2
    dense2 = tf.layers.dense(inputs=dense1, units=1024, activation=tf.nn.relu)
    dense2 = tf.layers.dropout(inputs=dense2, rate=FLAGS.dropout, training=mode == tf.estimator.ModeKeys.TRAIN)    
    
    # Logits layer
    logits = tf.layers.dense(inputs=dense2, units=num_classes)

    predictions = {
      "classes": tf.argmax(input=logits, axis=1),
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
    }
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Calculate Loss (for both TRAIN and EVAL modes)
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels), name='loss_tensor')   
    correct_prediction = tf.equal(tf.argmax(logits, 1), labels) 
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='accuracy_tensor')
    tf.summary.scalar('accuracy', accuracy)
        
    eval_metric_ops = {
      "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])}
    
    # Configure the Training Op (for TRAIN mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)
        train_op = optimizer.minimize(
            loss=loss,
            global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops)

  # Add evaluation metrics (for EVAL mode)
    eval_metric_ops = {
      "accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])}
    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

In [7]:
def main(_):

    # Create the Estimator or warm-start the last one
    sound_classifier = tf.estimator.Estimator(
      model_fn=sound_net, model_dir=FLAGS.model_folder)
    
    # Set up logging for predictions
    tensors_to_log = {"loss": "loss_tensor", "accuracy": "accuracy_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors_to_log, every_n_iter=100)

    # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={"x": x_train},
      y=y_train,
      batch_size=32,
      num_epochs=None,
      shuffle=True)
    
    sound_classifier.train(
      input_fn=train_input_fn,
      steps=FLAGS.max_steps,
      hooks=[logging_hook])

    # Evaluate the model and print results
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
      x={"x": x_test},
      y=y_test,
      num_epochs=1,
      shuffle=False)
    eval_results = sound_classifier.evaluate(input_fn=eval_input_fn)
    print(eval_results)

In [9]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--max_steps', type=int, default=10000,
                      help='Number of steps to run trainer.')
    parser.add_argument('--learning_rate', type=float, default=0.0001,
                      help='Initial learning rate')
    parser.add_argument('--dropout', type=float, default=0.4,
                      help='Keep probability for training dropout.')
    parser.add_argument('--model_folder', type=str, default="saved_models/estimator_api",
                      help='Folder where the model will be saved.')
    
    FLAGS, unparsed = parser.parse_known_args()    
    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'saved_models/estimator_api', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f85244c0cc0>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into saved_models/estimator_api/model.ckpt.
INFO:tensorflow:loss = 5.1302285, accuracy = 0.5
INFO:tensorflow:loss = 5.1302285, step = 1
INFO:tensorflow:global_step/sec: 9.40131
INFO:tensorflow:loss = 6.7936606, accuracy = 0.6875 (10.638 sec)
INFO:tensorflow:loss = 6.7936606, step = 101 (10.638 sec)
INFO:tensorflow:global_ste

INFO:tensorflow:loss = 0.2636202, step = 4501 (10.598 sec)
INFO:tensorflow:global_step/sec: 9.43987
INFO:tensorflow:loss = 0.20664205, accuracy = 0.9375 (10.594 sec)
INFO:tensorflow:loss = 0.20664205, step = 4601 (10.594 sec)
INFO:tensorflow:global_step/sec: 9.43846
INFO:tensorflow:loss = 0.04210343, accuracy = 1.0 (10.594 sec)
INFO:tensorflow:loss = 0.04210343, step = 4701 (10.594 sec)
INFO:tensorflow:global_step/sec: 9.44037
INFO:tensorflow:loss = 0.05871944, accuracy = 1.0 (10.593 sec)
INFO:tensorflow:loss = 0.05871944, step = 4801 (10.593 sec)
INFO:tensorflow:global_step/sec: 9.43934
INFO:tensorflow:loss = 0.22414534, accuracy = 0.9375 (10.594 sec)
INFO:tensorflow:loss = 0.22414534, step = 4901 (10.594 sec)
INFO:tensorflow:global_step/sec: 9.4344
INFO:tensorflow:loss = 0.25695798, accuracy = 0.9375 (10.600 sec)
INFO:tensorflow:loss = 0.25695798, step = 5001 (10.600 sec)
INFO:tensorflow:global_step/sec: 9.42806
INFO:tensorflow:loss = 0.18282278, accuracy = 0.9375 (10.607 sec)
INFO:t

INFO:tensorflow:loss = 0.12891431, step = 9401 (10.619 sec)
INFO:tensorflow:global_step/sec: 9.42919
INFO:tensorflow:loss = 0.17884442, accuracy = 0.9375 (10.605 sec)
INFO:tensorflow:loss = 0.17884442, step = 9501 (10.605 sec)
INFO:tensorflow:global_step/sec: 9.42899
INFO:tensorflow:loss = 0.07327816, accuracy = 0.96875 (10.606 sec)
INFO:tensorflow:loss = 0.07327816, step = 9601 (10.606 sec)
INFO:tensorflow:global_step/sec: 9.42546
INFO:tensorflow:loss = 0.015514433, accuracy = 1.0 (10.610 sec)
INFO:tensorflow:loss = 0.015514433, step = 9701 (10.609 sec)
INFO:tensorflow:global_step/sec: 9.42865
INFO:tensorflow:loss = 0.08425382, accuracy = 0.96875 (10.606 sec)
INFO:tensorflow:loss = 0.08425382, step = 9801 (10.606 sec)
INFO:tensorflow:global_step/sec: 9.4307
INFO:tensorflow:loss = 0.100519076, accuracy = 0.96875 (10.604 sec)
INFO:tensorflow:loss = 0.100519076, step = 9901 (10.604 sec)
INFO:tensorflow:Saving checkpoints for 10000 into saved_models/estimator_api/model.ckpt.
INFO:tensorfl

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [87]:
tf.reset_default_graph()